Ejemplo n.º 1
0
    def __init__(self, config):
        super(A3C, self).__init__(config)
        self.num_processes = multiprocessing.cpu_count()
        self.worker_processes = max(1, self.num_processes - 2)
        print(
            f"running {self.worker_processes} worker processes plus optimizer process plus main process"
        )

        self.actor_critic = DRRLnet(10,
                                    10,
                                    self.action_size,
                                    n_f_conv1=12,
                                    n_f_conv2=12,
                                    att_emb_size=32,
                                    n_heads=2,
                                    n_att_stack=1,
                                    n_fc_layers=4,
                                    pad=True,
                                    baseline_mode=False,
                                    n_baseMods=3)
        self.actor_critic_optimizer = SharedAdam(
            self.actor_critic.parameters(),
            lr=self.hyperparameters["learning_rate"],
            eps=1e-4)
        self.actor_critic_optimizer.zero_grad()
    def __init__(self, config, agent_name_=agent_name):
        super(A3C, self).__init__(config, agent_name_=agent_name_)
        self.num_processes = multiprocessing.cpu_count()
        self.worker_processes = max(1, self.num_processes - 2)
        self.actor_critic = self.create_NN(input_dim=self.state_size, output_dim=[self.action_size, 1])
        self.actor_critic_optimizer = SharedAdam(self.actor_critic.parameters(), lr=self.hyperparameters["learning_rate"], eps=1e-4)

        self.wandb_watch(self.actor_critic, log_freq=self.config.wandb_model_log_freq)
 def __init__(self, config):
     super(A3C, self).__init__(config)
     self.num_processes = multiprocessing.cpu_count()
     self.worker_processes = max(1, self.num_processes - 2)
     self.actor_critic = self.create_NN(input_dim=self.state_size,
                                        output_dim=[self.action_size, 1])
     self.actor_critic_optimizer = SharedAdam(
         self.actor_critic.parameters(),
         lr=self.hyperparameters["learning_rate"])
    def __init__(self, config):
        super(A3C, self).__init__(config)
        self.num_processes = multiprocessing.cpu_count()
        self.worker_processes = max(1, self.num_processes - 2)

        model_path = self.config.model_path if self.config.model_path else 'Models'
        self.actor_critic_path = os.path.join(
            model_path, "{}_actor_critic.pt".format(self.agent_name))
        self.actor_critic = self.create_NN(input_dim=self.state_size,
                                           output_dim=[self.action_size, 1])
        if self.config.load_model: self.locally_load_policy()
        self.actor_critic_optimizer = SharedAdam(
            self.actor_critic.parameters(),
            lr=self.hyperparameters["learning_rate"],
            eps=1e-4)
Ejemplo n.º 5
0
class A3C(Base_Agent):
    """Actor critic A3C algorithm from deepmind paper https://arxiv.org/pdf/1602.01783.pdf"""
    agent_name = "A3C"

    def __init__(self, config):
        super(A3C, self).__init__(config)
        self.num_processes = multiprocessing.cpu_count()
        self.worker_processes = max(1, self.num_processes - 2)
        self.actor_critic = self.create_NN(input_dim=self.state_size,
                                           output_dim=[self.action_size, 1])
        self.actor_critic_optimizer = SharedAdam(
            self.actor_critic.parameters(),
            lr=self.hyperparameters["learning_rate"],
            eps=1e-4)

    def run_n_episodes(self):
        """Runs game to completion n times and then summarises results and saves model (if asked to)"""
        start = time.time()
        results_queue = Queue()
        gradient_updates_queue = Queue()
        episode_number = multiprocessing.Value('i', 0)
        self.optimizer_lock = multiprocessing.Lock()
        episodes_per_process = int(
            self.config.num_episodes_to_run / self.worker_processes) + 1
        processes = []
        self.actor_critic.share_memory()
        self.actor_critic_optimizer.share_memory()

        optimizer_worker = multiprocessing.Process(
            target=self.update_shared_model, args=(gradient_updates_queue, ))
        optimizer_worker.start()

        for process_num in range(self.worker_processes):
            worker = Actor_Critic_Worker(
                process_num, copy.deepcopy(self.environment),
                self.actor_critic, episode_number, self.optimizer_lock,
                self.actor_critic_optimizer, self.config, episodes_per_process,
                self.hyperparameters["epsilon_decay_rate_denominator"],
                self.action_size, self.action_types, results_queue,
                copy.deepcopy(self.actor_critic), gradient_updates_queue)
            worker.start()
            processes.append(worker)
        self.print_results(episode_number, results_queue)
        for worker in processes:
            worker.join()
        optimizer_worker.kill()

        time_taken = time.time() - start
        return self.game_full_episode_scores, self.rolling_results, time_taken

    def print_results(self, episode_number, results_queue):
        """Worker that prints out results as they get put into a queue"""
        while True:
            with episode_number.get_lock():
                carry_on = episode_number.value < self.config.num_episodes_to_run
            if carry_on:
                if not results_queue.empty():
                    self.total_episode_score_so_far = results_queue.get()
                    self.save_and_print_result()
            else:
                break

    def update_shared_model(self, gradient_updates_queue):
        """Worker that updates the shared model with gradients as they get put into the queue"""
        while True:
            gradients = gradient_updates_queue.get()
            with self.optimizer_lock:
                self.actor_critic_optimizer.zero_grad()
                for grads, params in zip(gradients,
                                         self.actor_critic.parameters()):
                    params._grad = grads  # maybe need to do grads.clone()
                self.actor_critic_optimizer.step()
class A3C(Base_Agent):
    """Actor critic A3C algorithm from deepmind paper https://arxiv.org/pdf/1602.01783.pdf"""
    agent_name = "A3C"

    def __init__(self, config):
        super(A3C, self).__init__(config)
        self.num_processes = multiprocessing.cpu_count()
        self.worker_processes = max(1, self.num_processes - 2)

        model_path = self.config.model_path if self.config.model_path else 'Models'
        self.actor_critic_path = os.path.join(
            model_path, "{}_actor_critic.pt".format(self.agent_name))
        self.actor_critic = self.create_NN(input_dim=self.state_size,
                                           output_dim=[self.action_size, 1])
        if self.config.load_model: self.locally_load_policy()
        self.actor_critic_optimizer = SharedAdam(
            self.actor_critic.parameters(),
            lr=self.hyperparameters["learning_rate"],
            eps=1e-4)

    def run_n_episodes(self):
        """Runs game to completion n times and then summarises results and saves model (if asked to)"""
        start = time.time()
        results_queue = Queue()
        gradient_updates_queue = Queue()
        episode_number = multiprocessing.Value('i', 0)
        self.optimizer_lock = multiprocessing.Lock()
        episodes_per_process = int(
            self.config.num_episodes_to_run / self.worker_processes) + 1
        processes = []
        self.actor_critic.share_memory()
        self.actor_critic_optimizer.share_memory()

        optimizer_worker = multiprocessing.Process(
            target=self.update_shared_model, args=(gradient_updates_queue, ))
        optimizer_worker.start()

        for process_num in range(self.worker_processes):
            worker = Actor_Critic_Worker(
                process_num, copy.deepcopy(self.environment),
                self.actor_critic, episode_number, self.optimizer_lock,
                self.actor_critic_optimizer, self.config, episodes_per_process,
                self.hyperparameters["epsilon_decay_rate_denominator"],
                self.action_size, self.action_types, results_queue,
                copy.deepcopy(self.actor_critic), gradient_updates_queue)
            worker.start()
            processes.append(worker)
        self.print_results(episode_number, results_queue)
        for worker in processes:
            worker.join()
        # optimizer_worker.kill()
        optimizer_worker.terminate()

        if self.config.save_model: self.locally_save_policy()
        time_taken = time.time() - start
        return self.game_full_episode_scores, self.rolling_results, time_taken

    def run_test(self,
                 show_whether_achieved_goal=True,
                 save_and_print_results=True):
        """Runs game to completion n times and then summarises results and saves model (if asked to)"""
        start = time.time()

        results_queue = Queue()
        gradient_updates_queue = Queue()
        episode_number = multiprocessing.Value('i', 0)
        self.optimizer_lock = multiprocessing.Lock()
        episodes_per_process = int(
            self.config.num_episodes_to_run / self.worker_processes) + 1
        processes = []
        self.actor_critic.share_memory()
        self.actor_critic_optimizer.share_memory()

        optimizer_worker = multiprocessing.Process(
            target=self.update_shared_model, args=(gradient_updates_queue, ))
        optimizer_worker.start()

        for process_num in range(1):
            worker = Actor_Critic_Worker(
                process_num, copy.deepcopy(self.environment),
                self.actor_critic, episode_number, self.optimizer_lock,
                self.actor_critic_optimizer, self.config, 1,
                self.hyperparameters["epsilon_decay_rate_denominator"],
                self.action_size, self.action_types, results_queue,
                copy.deepcopy(self.actor_critic), gradient_updates_queue)
            worker.start()
            processes.append(worker)
        self.print_results(episode_number, results_queue)
        for worker in processes:
            worker.join()
        # optimizer_worker.kill()
        optimizer_worker.terminate()

        if self.config.save_model: self.locally_save_policy()

        plt.cla()
        print("id", self.config.environment._position_history)
        self.config.environment.render_all()
        if self.config.run_test_path:
            plt.savefig(self.config.run_test_path.format(
                self.agent_name))  # , bbox_inches="tight")
        plt.show()
        return self.game_full_episode_scores, self.rolling_results, time_taken

    def print_results(self, episode_number, results_queue):
        """Worker that prints out results as they get put into a queue"""
        while True:
            with episode_number.get_lock():
                carry_on = episode_number.value < self.config.num_episodes_to_run
            if carry_on:
                if not results_queue.empty():
                    self.total_episode_score_so_far = results_queue.get()
                    self.save_and_print_result()
            else:
                break

    def update_shared_model(self, gradient_updates_queue):
        """Worker that updates the shared model with gradients as they get put into the queue"""
        while True:
            gradients = gradient_updates_queue.get()
            with self.optimizer_lock:
                self.actor_critic_optimizer.zero_grad()
                for grads, params in zip(gradients,
                                         self.actor_critic.parameters()):
                    params._grad = grads  # maybe need to do grads.clone()
                self.actor_critic_optimizer.step()

    def locally_save_policy(self):
        """Saves the policy"""
        """保存策略,待添加"""
        torch.save(self.actor_critic.state_dict(), self.actor_critic_path)

    def locally_load_policy(self):
        print("locall_load_policy")
        if os.path.isfile(self.actor_critic_path):
            print("load actor_critic_path")
            self.actor_critic.load_state_dict(
                torch.load(self.actor_critic_path))