def bootstrap(self, model_paths, env, mixture): if np.random.rand(1) < self.retraining_prob: path = np.random.choice(model_paths, 1, p = mixture)[0] path = glob.glob(path + "/*.zip")[0] return helper.loadAgent(path, self.model_type) else: return helper.make_new_model(self.model_type, self.policy, env, n_steps=self.training_steps)
def compute_initial_payoffs(self): # If I appended multiple entries all together if len(self.initial_pelicans) > 0: self.pelicans = self.pelicans[0] if len(self.initial_panthers) > 0: self.panthers = self.panthers[0] # If it is the first iteration and we are starting with initial models we need to build the corresponding payoff # Left out the last one for each (added in the normal cycle flow) # As we may start with a different number of agents per set, we need to deal with this for j, (pelican, panther) in enumerate(itertools.zip_longest(self.pelicans[:-1], self.panthers[:-1])): if pelican is not None: path = glob.glob(pelican + "/*.zip")[0] self.pelican_model = helper.loadAgent(path, self.model_type) else: self.pelican_model = None if panther is not None: path = glob.glob(panther + "/*.zip")[0] self.panther_model = helper.loadAgent(path, self.model_type) else: self.panther_model = None self.compute_payoff_matrix(self.pelicans[:min(j + 1, len(self.pelicans))], self.panthers[:min(j + 1, len(self.panthers))])
def run_pnm(self): panther_agent_filepath, pelican_agent_filepath = self.initialAgents() # Initialize old NE stuff for stopping criterion value_to_pelican = 0. mixture_pelicans = np.array([1.]) mixture_panthers = np.array([1.]) # Create DataFrames for plotting purposes df_cols = ["NE_Payoff", "Pelican_BR_Payoff", "Panther_BR_Payoff", "Pelican_supp_size", "Panther_supp_size"] df = pd.DataFrame(columns = df_cols) # second df for period rigorous exploitability checks exploit_df_cols = ["iter", "NE_Payoff", "Pelican_BR_Payoffs", "Panther_BR_Payoffs"] exploit_df = pd.DataFrame(columns = exploit_df_cols) # Train best responses until Nash equilibrium is found or max_iterations are reached logger.info('Parallel Nash Memory (PNM)') for self.pnm_iteration in range(self.max_pnm_iterations): start = time.time() logger.info("*********************************************************") logger.info('PNM iteration ' + str(self.pnm_iteration + 1) + ' of ' + str(self.max_pnm_iterations)) logger.info("*********************************************************") self.pelicans.append(pelican_agent_filepath) self.panthers.append(panther_agent_filepath) if self.pnm_iteration == 0: self.compute_initial_payoffs() # Computing the payoff matrices and solving the corresponding LPs # Only compute for pelican in the sparse env, that of panther is the negative traspose (game is zero-sum) logger.info('Computing payoffs and mixtures') self.compute_payoff_matrix(self.pelicans, self.panthers) logger.info("=================================================") logger.info("New matrix game:") logger.info("As numpy array:") logger.info('\n' + str(self.payoffs)) logger.info("As dataframe:") tmp_df = pd.DataFrame(self.payoffs).rename_axis('Pelican', axis = 0).rename_axis('Panther', axis = 1) logger.info('\n' + str(tmp_df)) # save payoff matrix np.save('%s/payoffs_%d.npy' % (self.pnm_logs_exp_path, self.pnm_iteration), self.payoffs) def get_support_size(mixture): # return size of the support of mixed strategy mixture return sum([1 if m > 0 else 0 for m in mixture]) # Check if we found a stable NE, in that case we are done (and fitting DF) if self.pnm_iteration > 0: # Both BR payoffs (from against last time's NE) in terms of pelican payoff br_value_pelican = np.dot(mixture_pelicans, self.payoffs[-1, :-1]) br_value_panther = np.dot(mixture_panthers, self.payoffs[:-1, -1]) ssize_pelican = get_support_size(mixture_pelicans) ssize_panther = get_support_size(mixture_panthers) logger.info("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@") logger.info("\n\ Pelican BR payoff: %.3f,\n\ Value of Game: %.3f,\n\ Panther BR payoff: %.3f,\n\ Pelican Supp Size: %d,\n\ Panther Supp Size: %d,\n" % ( br_value_pelican, value_to_pelican, br_value_panther, ssize_pelican, ssize_panther )) logger.info("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@") values = dict(zip(df_cols, [value_to_pelican, br_value_pelican, br_value_panther, ssize_pelican, ssize_panther])) df = df.append(values, ignore_index = True) # Write to csv file df_path = os.path.join(self.exp_path, 'values_iter_%02d.csv' % self.pnm_iteration) df.to_csv(df_path, index = False) helper.get_fig(df) fig_path = os.path.join(self.exp_path, 'values_iter_%02d.pdf' % self.pnm_iteration) plt.savefig(fig_path) print("==========================================") print("WRITTEN VALUES DF TO CSV: %s" % df_path) print("==========================================") # here value_to_pelican is from the last time the subgame was solved if abs(br_value_pelican - value_to_pelican) < self.stopping_eps and\ abs(br_value_panther - value_to_pelican) < self.stopping_eps: print('Stable Nash Equilibrium found') break logger.info("SOLVING NEW GAME:") # solve game for pelican (mixture_pelicans, value_to_pelican) = lp_solve.solve_zero_sum_game(self.payoffs) # with np.printoptions(precision=3): logger.info(mixture_pelicans) mixture_pelicans /= np.sum(mixture_pelicans) # with np.printoptions(precision=3): logger.info("After normalisation:") logger.info(mixture_pelicans) np.save('%s/mixture_pelicans_%d.npy' % (self.pnm_logs_exp_path, self.pnm_iteration), mixture_pelicans) # solve game for panther (mixture_panthers, value_panthers) = lp_solve.solve_zero_sum_game(-self.payoffs.transpose()) # with np.printoptions(precision=3): logger.info(mixture_panthers) mixture_panthers /= np.sum(mixture_panthers) # with np.printoptions(precision=3): logger.info("After normalisation:") logger.info(mixture_panthers) np.save('%s/mixture_panthers_%d.npy' % (self.pnm_logs_exp_path, self.pnm_iteration), mixture_panthers) # end of logging matrix game and solution logger.info("=================================================") # Train from skratch or retrain an existing model for pelican logger.info('Training pelican') self.pelican_model = self.bootstrap(self.pelicans, self.pelican_env, mixture_pelicans) pelican_agent_filepath = self.train_agent_against_mixture('pelican', self.pelicans_tmp_exp_path, self.pelican_model, self.pelican_env, self.panthers, mixture_panthers, self.training_steps) # Train from scratch or retrain an existing model for panther logger.info('Training panther') self.panther_model = self.bootstrap(self.panthers, self.panther_env, mixture_panthers) panther_agent_filepath = self.train_agent_against_mixture('panther', self.panthers_tmp_exp_path, self.panther_model, self.panther_env, self.pelicans, mixture_pelicans, self.training_steps) logger.info("PNM iteration lasted: %d seconds" % (time.time() - start)) if self.pnm_iteration > 0 and self.pnm_iteration % self.testing_interval == 0: # Find best pelican (protagonist) against panther (opponent) mixture candidate_pelican_rbbr_fpaths, candidate_pelican_rbbr_win_percentages = self.iter_train_against_mixture( self.exploit_n_rbbrs, # Number of resource bounded best responses self.pelicans_tmp_exp_path, self.pelican_model, # driving_agent, # agent that we train self.pelican_env, # env, # Can either be a single env or subvecproc self.pelicans, # Filepaths to existing models mixture_pelicans, # mixture for bootstrapping self.panthers, # opponent_policy_fpaths, # policies of opponent of driving agent mixture_panthers) # opponent_mixture) logger.info("################################################") logger.info('candidate_pelican_rbbr_win_percentages: %s' % np.round(candidate_pelican_rbbr_win_percentages,2)) logger.info("################################################") br_values_pelican = np.round(candidate_pelican_rbbr_win_percentages,2).tolist() candidate_panther_rbbr_fpaths, candidate_panther_rbbr_win_percentages = self.iter_train_against_mixture( self.exploit_n_rbbrs, # Number of resource bounded best responses self.panthers_tmp_exp_path, self.panther_model, # driving_agent, # agent that we train self.panther_env, # env, # Can either be a single env or subvecproc self.panthers, # Filepaths to existing models mixture_panthers, # mixture for bootstrapping self.pelicans, # opponent_policy_fpaths, # policies of opponent of driving agent mixture_pelicans) # opponent_mixture) logger.info("################################################") logger.info('candidate_panther_rbbr_win_percentages: %s' % np.round(candidate_panther_rbbr_win_percentages,2)) logger.info("################################################") br_values_panther = [1-p for p in np.round(candidate_panther_rbbr_win_percentages,2)] values = dict(zip(exploit_df_cols, [self.pnm_iteration, value_to_pelican, br_values_pelican, br_values_panther])) exploit_df = exploit_df.append(values, ignore_index = True) # add medians exploit_df['pelican_median'] = exploit_df['Pelican_BR_Payoffs'].apply(np.median) exploit_df['panther_median'] = exploit_df['Panther_BR_Payoffs'].apply(np.median) # Write to csv file df_path = os.path.join(self.exp_path, 'exploit_iter_%02d.csv' % self.pnm_iteration) tmp_df = exploit_df.set_index('iter') tmp_df.to_csv(df_path, index = True) helper.get_fig_with_exploit(df, tmp_df) fig_path = os.path.join(self.exp_path, 'values_with_exploit_iter_%02d.pdf' % self.pnm_iteration) plt.savefig(fig_path) print("==========================================") print("WRITTEN EXPLOIT DF TO CSV: %s" % df_path) print("==========================================") if self.video_flag: # occasionally ouput useful things along the way # Make videos verbose = False video_path = os.path.join(self.exp_path, 'pelican_pnm_iter_%02d.mp4' % self.pnm_iteration) basewidth,hsize = helper.make_video_VEC_ENV(self.pelican_model, self.pelican_env, video_path, fps=self.fps, basewidth=self.basewidth, n_steps=self.video_steps, verbose=verbose) video_path = os.path.join(self.exp_path, 'panther_pnm_iter_%02d.mp4' % self.pnm_iteration) basewidth,hsize = helper.make_video_VEC_ENV(self.panther_model, self.panther_env, video_path, fps=self.fps, basewidth=self.basewidth, n_steps=self.video_steps, verbose=verbose) # Saving final mixture and corresponding agents logger.info("################################################") logger.info("Saving final pelican mixtures and agents:") support_pelicans = np.nonzero(mixture_pelicans)[0] mixture_pelicans = mixture_pelicans[support_pelicans] np.save(self.exp_path + '/final_mixture_pelicans.npy', mixture_pelicans) logger.info("Final pelican mixture saved to: %s" % self.exp_path + '/final_mixture_pelicans.npy') for i, idx in enumerate(mixture_pelicans): self.pelican_model = helper.loadAgent(glob.glob(self.pelicans[i]+ "/*.zip")[0], self.model_type) agent_filepath ,_, _= helper.save_model_with_env_settings(self.pelicans_tmp_exp_path, self.pelican_model, self.model_type, self.pelican_env, self.basicdate + "_ps_" + str(i)) logger.info("Saving pelican %d to %s" % (i, agent_filepath)) support_panthers = np.nonzero(mixture_panthers)[0] mixture_panthers = mixture_panthers[support_panthers] np.save(self.exp_path + '/final_mixture_panthers.npy', mixture_panthers) logger.info("Final panther mixture saved to: %s" % self.exp_path + '/final_mixture_panthers.npy') for i, idx in enumerate(mixture_panthers): self.panther_model = helper.loadAgent(glob.glob(self.panthers[i]+ "/*.zip")[0], self.model_type) agent_filepath ,_, _= helper.save_model_with_env_settings(self.panthers_tmp_exp_path, self.panther_model, self.model_type, self.panther_env, self.basicdate + "_ps_" + str(i)) logger.info("Saving panther %d to %s" % (i, agent_filepath))