Esempio n. 1
0
 def bootstrap(self, model_paths, env, mixture):
     if np.random.rand(1) < self.retraining_prob:
         path = np.random.choice(model_paths, 1, p=mixture)[0]
         path = glob.glob(path + "/*.zip")[0]
         return helper.loadAgent(path, self.model_type)
     else:
         return helper.make_new_model(self.model_type,
                                      self.policy,
                                      env,
                                      n_steps=self.training_steps)
Esempio n. 2
0
 def compute_initial_payoffs(self):
     # If I appended multiple entries all together
     if len(self.initial_pelicans) > 0:
         self.pelicans = self.pelicans[0]
     if len(self.initial_panthers) > 0:
         self.panthers = self.panthers[0]
     # If it is the first iteration and we are starting with initial models we need to build the corresponding payoff
     # Left out the last one for each (added in the normal cycle flow)
     # As we may start with a different number of agents per set, we need to deal with this
     for j, (pelican, panther) in enumerate(
             itertools.zip_longest(self.pelicans[:-1], self.panthers[:-1])):
         if pelican is not None:
             path = glob.glob(pelican + "/*.zip")[0]
             self.pelican_model = helper.loadAgent(path, self.model_type)
         else:
             self.pelican_model = None
         if panther is not None:
             path = glob.glob(panther + "/*.zip")[0]
             self.panther_model = helper.loadAgent(path, self.model_type)
         else:
             self.panther_model = None
         self.compute_payoff_matrix(
             self.pelicans[:min(j + 1, len(self.pelicans))],
             self.panthers[:min(j + 1, len(self.panthers))])
Esempio n. 3
0
    def run_pnm(self):

        panther_agent_filepath, pelican_agent_filepath = self.initialAgents()

        # Initialize old NE stuff for stopping criterion
        value_to_pelican = 0.
        mixture_pelicans = np.array([1.])
        mixture_panthers = np.array([1.])

        # Create DataFrames for plotting purposes
        df_cols = [
            "NE_Payoff", "Pelican_BR_Payoff", "Panther_BR_Payoff",
            "Pelican_supp_size", "Panther_supp_size"
        ]
        df = pd.DataFrame(columns=df_cols)
        # second df for period rigorous exploitability checks
        exploit_df_cols = [
            "iter", "NE_Payoff", "Pelican_BR_Payoffs", "Panther_BR_Payoffs"
        ]
        exploit_df = pd.DataFrame(columns=exploit_df_cols)

        # Train best responses until Nash equilibrium is found or max_iterations are reached
        logger.info('Parallel Nash Memory (PNM)')
        for self.pnm_iteration in range(self.max_pnm_iterations):
            start = time.time()

            logger.info(
                "*********************************************************")
            logger.info('PNM iteration ' + str(self.pnm_iteration + 1) +
                        ' of ' + str(self.max_pnm_iterations))
            logger.info(
                "*********************************************************")

            self.pelicans.append(pelican_agent_filepath)
            self.panthers.append(panther_agent_filepath)

            if self.pnm_iteration == 0:
                self.compute_initial_payoffs()

            # Computing the payoff matrices and solving the corresponding LPs
            # Only compute for pelican in the sparse env, that of panther is the negative traspose (game is zero-sum)
            logger.info('Computing payoffs and mixtures')
            self.compute_payoff_matrix(self.pelicans, self.panthers)
            logger.info("=================================================")
            logger.info("New matrix game:")
            logger.info("As numpy array:")
            logger.info('\n' + str(self.payoffs))
            logger.info("As dataframe:")
            tmp_df = pd.DataFrame(self.payoffs).rename_axis(
                'Pelican', axis=0).rename_axis('Panther', axis=1)
            logger.info('\n' + str(tmp_df))

            # save payoff matrix
            np.save(
                '%s/payoffs_%d.npy' %
                (self.pnm_logs_exp_path, self.pnm_iteration), self.payoffs)

            def get_support_size(mixture):
                # return size of the support of mixed strategy mixture
                return sum([1 if m > 0 else 0 for m in mixture])

            # Check if we found a stable NE, in that case we are done (and fitting DF)
            if self.pnm_iteration > 0:
                # Both BR payoffs (from against last time's NE) in terms of pelican payoff
                br_value_pelican = np.dot(mixture_pelicans,
                                          self.payoffs[-1, :-1])
                br_value_panther = np.dot(mixture_panthers, self.payoffs[:-1,
                                                                         -1])

                ssize_pelican = get_support_size(mixture_pelicans)
                ssize_panther = get_support_size(mixture_panthers)

                logger.info(
                    "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
                logger.info("\n\
                             Pelican BR payoff: %.3f,\n\
                             Value of Game: %.3f,\n\
                             Panther BR payoff: %.3f,\n\
                             Pelican Supp Size: %d,\n\
                             Panther Supp Size: %d,\n" %
                            (br_value_pelican, value_to_pelican,
                             br_value_panther, ssize_pelican, ssize_panther))
                logger.info(
                    "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
                values = dict(
                    zip(df_cols, [
                        value_to_pelican, br_value_pelican, br_value_panther,
                        ssize_pelican, ssize_panther
                    ]))
                df = df.append(values, ignore_index=True)

                # Write to csv file
                df_path = os.path.join(
                    self.exp_path, 'values_iter_%02d.csv' % self.pnm_iteration)
                df.to_csv(df_path, index=False)
                helper.get_fig(df)
                fig_path = os.path.join(
                    self.exp_path, 'values_iter_%02d.pdf' % self.pnm_iteration)
                plt.savefig(fig_path)
                print("==========================================")
                print("WRITTEN VALUES DF TO CSV: %s" % df_path)
                print("==========================================")

                # here value_to_pelican is from the last time the subgame was solved
                if abs(br_value_pelican - value_to_pelican) < self.stopping_eps and\
                   abs(br_value_panther - value_to_pelican) < self.stopping_eps:

                    print('Stable Nash Equilibrium found')
                    break

            logger.info("SOLVING NEW GAME:")
            # solve game for pelican
            (mixture_pelicans,
             value_to_pelican) = lp_solve.solve_zero_sum_game(self.payoffs)
            # with np.printoptions(precision=3):
            logger.info(mixture_pelicans)
            mixture_pelicans /= np.sum(mixture_pelicans)
            # with np.printoptions(precision=3):
            logger.info("After normalisation:")
            logger.info(mixture_pelicans)
            np.save(
                '%s/mixture_pelicans_%d.npy' %
                (self.pnm_logs_exp_path, self.pnm_iteration), mixture_pelicans)

            # solve game for panther
            (mixture_panthers, value_panthers
             ) = lp_solve.solve_zero_sum_game(-self.payoffs.transpose())
            # with np.printoptions(precision=3):
            logger.info(mixture_panthers)
            mixture_panthers /= np.sum(mixture_panthers)
            # with np.printoptions(precision=3):
            logger.info("After normalisation:")
            logger.info(mixture_panthers)
            np.save(
                '%s/mixture_panthers_%d.npy' %
                (self.pnm_logs_exp_path, self.pnm_iteration), mixture_panthers)

            # end of logging matrix game and solution
            logger.info("=================================================")

            # Train from skratch or retrain an existing model for pelican
            logger.info('Training pelican')

            self.pelican_model = self.bootstrap(self.pelicans,
                                                self.pelican_env,
                                                mixture_pelicans)

            pelican_agent_filepath = self.train_agent_against_mixture(
                'pelican', self.pelicans_tmp_exp_path, self.pelican_model,
                self.pelican_env, self.panthers, mixture_panthers,
                self.training_steps)

            # Train from scratch or retrain an existing model for panther
            logger.info('Training panther')

            self.panther_model = self.bootstrap(self.panthers,
                                                self.panther_env,
                                                mixture_panthers)

            panther_agent_filepath = self.train_agent_against_mixture(
                'panther', self.panthers_tmp_exp_path, self.panther_model,
                self.panther_env, self.pelicans, mixture_pelicans,
                self.training_steps)

            logger.info("PNM iteration lasted: %d seconds" %
                        (time.time() - start))

            if self.pnm_iteration > 0 and self.pnm_iteration % self.testing_interval == 0:
                # Find best pelican (protagonist) against panther (opponent) mixture
                candidate_pelican_rbbr_fpaths, candidate_pelican_rbbr_win_percentages = self.iter_train_against_mixture(
                    self.
                    exploit_n_rbbrs,  # Number of resource bounded best responses
                    self.pelicans_tmp_exp_path,
                    self.pelican_model,  # driving_agent, # agent that we train
                    self.
                    pelican_env,  # env, # Can either be a single env or subvecproc
                    self.pelicans,  # Filepaths to existing models
                    mixture_pelicans,  # mixture for bootstrapping
                    self.
                    panthers,  # opponent_policy_fpaths, # policies of opponent of driving agent
                    mixture_panthers)  # opponent_mixture)

                logger.info("################################################")
                logger.info(
                    'candidate_pelican_rbbr_win_percentages: %s' %
                    np.round(candidate_pelican_rbbr_win_percentages, 2))
                logger.info("################################################")
                br_values_pelican = np.round(
                    candidate_pelican_rbbr_win_percentages, 2).tolist()

                candidate_panther_rbbr_fpaths, candidate_panther_rbbr_win_percentages = self.iter_train_against_mixture(
                    self.
                    exploit_n_rbbrs,  # Number of resource bounded best responses
                    self.panthers_tmp_exp_path,
                    self.panther_model,  # driving_agent, # agent that we train
                    self.
                    panther_env,  # env, # Can either be a single env or subvecproc
                    self.panthers,  # Filepaths to existing models
                    mixture_panthers,  # mixture for bootstrapping
                    self.
                    pelicans,  # opponent_policy_fpaths, # policies of opponent of driving agent
                    mixture_pelicans)  # opponent_mixture)

                logger.info("################################################")
                logger.info(
                    'candidate_panther_rbbr_win_percentages: %s' %
                    np.round(candidate_panther_rbbr_win_percentages, 2))
                logger.info("################################################")
                br_values_panther = [
                    1 - p for p in np.round(
                        candidate_panther_rbbr_win_percentages, 2)
                ]

                values = dict(
                    zip(exploit_df_cols, [
                        self.pnm_iteration, value_to_pelican,
                        br_values_pelican, br_values_panther
                    ]))
                exploit_df = exploit_df.append(values, ignore_index=True)

                # add medians
                exploit_df['pelican_median'] = exploit_df[
                    'Pelican_BR_Payoffs'].apply(np.median)
                exploit_df['panther_median'] = exploit_df[
                    'Panther_BR_Payoffs'].apply(np.median)

                # Write to csv file
                df_path = os.path.join(
                    self.exp_path,
                    'exploit_iter_%02d.csv' % self.pnm_iteration)

                tmp_df = exploit_df.set_index('iter')
                tmp_df.to_csv(df_path, index=True)

                helper.get_fig_with_exploit(df, tmp_df)
                fig_path = os.path.join(
                    self.exp_path,
                    'values_with_exploit_iter_%02d.pdf' % self.pnm_iteration)
                plt.savefig(fig_path)
                print("==========================================")
                print("WRITTEN EXPLOIT DF TO CSV: %s" % df_path)
                print("==========================================")

                if self.video_flag:
                    # occasionally ouput useful things along the way
                    # Make videos
                    verbose = False
                    video_path = os.path.join(
                        self.exp_path,
                        'pelican_pnm_iter_%02d.mp4' % self.pnm_iteration)
                    basewidth, hsize = helper.make_video_VEC_ENV(
                        self.pelican_model,
                        self.pelican_env,
                        video_path,
                        fps=self.fps,
                        basewidth=self.basewidth,
                        n_steps=self.video_steps,
                        verbose=verbose)

                    video_path = os.path.join(
                        self.exp_path,
                        'panther_pnm_iter_%02d.mp4' % self.pnm_iteration)
                    basewidth, hsize = helper.make_video_VEC_ENV(
                        self.panther_model,
                        self.panther_env,
                        video_path,
                        fps=self.fps,
                        basewidth=self.basewidth,
                        n_steps=self.video_steps,
                        verbose=verbose)

        # Saving final mixture and corresponding agents
        logger.info("################################################")
        logger.info("Saving final pelican mixtures and agents:")
        support_pelicans = np.nonzero(mixture_pelicans)[0]
        mixture_pelicans = mixture_pelicans[support_pelicans]
        np.save(self.exp_path + '/final_mixture_pelicans.npy',
                mixture_pelicans)
        logger.info("Final pelican mixture saved to: %s" % self.exp_path +
                    '/final_mixture_pelicans.npy')
        print("mixture:")
        print(mixture_pelicans)
        for i, idx in enumerate(mixture_pelicans):
            self.pelican_model = helper.loadAgent(
                glob.glob(self.pelicans[i] + "/*.zip")[0], self.model_type)
            self.pelican_model.set_env(self.pelican_env)
            agent_filepath, _, _ = helper.save_model_with_env_settings(
                self.pelicans_tmp_exp_path, self.pelican_model,
                self.model_type, self.pelican_env,
                self.basicdate + "_ps_" + str(i))
            logger.info("Saving  pelican %d to %s" % (i, agent_filepath))
        support_panthers = np.nonzero(mixture_panthers)[0]
        mixture_panthers = mixture_panthers[support_panthers]
        np.save(self.exp_path + '/final_mixture_panthers.npy',
                mixture_panthers)
        logger.info("Final panther mixture saved to: %s" % self.exp_path +
                    '/final_mixture_panthers.npy')
        for i, idx in enumerate(mixture_panthers):
            self.panther_model = helper.loadAgent(
                glob.glob(self.panthers[i] + "/*.zip")[0], self.model_type)
            self.panther_model.set_env(self.panther_env)
            agent_filepath, _, _ = helper.save_model_with_env_settings(
                self.panthers_tmp_exp_path, self.panther_model,
                self.model_type, self.panther_env,
                self.basicdate + "_ps_" + str(i))

            logger.info("Saving  panther %d to %s" % (i, agent_filepath))