Ejemplo n.º 1
0
    def process_trained_results(self) -> ResultDict:
        # Get learner outputs/stats from output queue.
        final_learner_info = {}
        learner_infos = []
        num_env_steps_trained = 0
        num_agent_steps_trained = 0

        for _ in range(self._learner_thread.outqueue.qsize()):
            if self._learner_thread.is_alive():
                (
                    env_steps,
                    agent_steps,
                    learner_results,
                ) = self._learner_thread.outqueue.get(timeout=0.001)
                num_env_steps_trained += env_steps
                num_agent_steps_trained += agent_steps
                if learner_results:
                    learner_infos.append(learner_results)
            else:
                raise RuntimeError("The learner thread died in while training")
        if not learner_infos:
            final_learner_info = copy.deepcopy(self._learner_thread.learner_info)
        else:
            builder = LearnerInfoBuilder()
            for info in learner_infos:
                builder.add_learn_on_batch_results_multi_agent(info)
            final_learner_info = builder.finalize()

        # Update the steps trained counters.
        self._counters[NUM_ENV_STEPS_TRAINED] += num_env_steps_trained
        self._counters[NUM_AGENT_STEPS_TRAINED] += num_agent_steps_trained

        return final_learner_info
Ejemplo n.º 2
0
    def training_iteration(self) -> ResultDict:
        # Shortcut.
        first_worker = self.workers.remote_workers()[0]

        # Run sampling and update steps on each worker in asynchronous fashion.
        sample_and_update_results = asynchronous_parallel_requests(
            remote_requests_in_flight=self.remote_requests_in_flight,
            actors=self.workers.remote_workers(),
            ray_wait_timeout_s=0.0,
            max_remote_requests_in_flight_per_actor=1,  # 2
            remote_fn=self._sample_and_train_torch_distributed,
        )

        # For all results collected:
        # - Update our counters and timers.
        # - Update the worker's global_vars.
        # - Build info dict using a LearnerInfoBuilder object.
        learner_info_builder = LearnerInfoBuilder(num_devices=1)
        steps_this_iter = 0
        for worker, results in sample_and_update_results.items():
            for result in results:
                steps_this_iter += result["env_steps"]
                self._counters[NUM_AGENT_STEPS_SAMPLED] += result["agent_steps"]
                self._counters[NUM_AGENT_STEPS_TRAINED] += result["agent_steps"]
                self._counters[NUM_ENV_STEPS_SAMPLED] += result["env_steps"]
                self._counters[NUM_ENV_STEPS_TRAINED] += result["env_steps"]
                self._timers[LEARN_ON_BATCH_TIMER].push(result["learn_on_batch_time"])
                self._timers[SAMPLE_TIMER].push(result["sample_time"])
            # Add partial learner info to builder object.
            learner_info_builder.add_learn_on_batch_results_multi_agent(result["info"])

            # Broadcast the local set of global vars.
            global_vars = {"timestep": self._counters[NUM_AGENT_STEPS_SAMPLED]}
            for worker in self.workers.remote_workers():
                worker.set_global_vars.remote(global_vars)

        self._counters[STEPS_TRAINED_THIS_ITER_COUNTER] = steps_this_iter

        # Sync down the weights from 1st remote worker (only if we have received
        # some results from it).
        # As with the sync up, this is not really needed unless the user is
        # reading the local weights.
        if (
            self.config["keep_local_weights_in_sync"]
            and first_worker in sample_and_update_results
        ):
            self.workers.local_worker().set_weights(
                ray.get(first_worker.get_weights.remote())
            )
        # Return merged laarner into results.
        new_learner_info = learner_info_builder.finalize()
        if new_learner_info:
            self._curr_learner_info = new_learner_info
        return self._curr_learner_info
Ejemplo n.º 3
0
    def training_step(self) -> ResultDict:
        # Shortcut.
        first_worker = self.workers.remote_workers()[0]

        self._ddppo_worker_manager.call_on_all_available(
            self._sample_and_train_torch_distributed
        )
        sample_and_update_results = self._ddppo_worker_manager.get_ready()

        # For all results collected:
        # - Update our counters and timers.
        # - Update the worker's global_vars.
        # - Build info dict using a LearnerInfoBuilder object.
        learner_info_builder = LearnerInfoBuilder(num_devices=1)
        for worker, results in sample_and_update_results.items():
            for result in results:
                self._counters[NUM_AGENT_STEPS_SAMPLED] += result["agent_steps"]
                self._counters[NUM_AGENT_STEPS_TRAINED] += result["agent_steps"]
                self._counters[NUM_ENV_STEPS_SAMPLED] += result["env_steps"]
                self._counters[NUM_ENV_STEPS_TRAINED] += result["env_steps"]
                self._timers[LEARN_ON_BATCH_TIMER].push(result["learn_on_batch_time"])
                self._timers[SAMPLE_TIMER].push(result["sample_time"])
            # Add partial learner info to builder object.
            learner_info_builder.add_learn_on_batch_results_multi_agent(result["info"])

            # Broadcast the local set of global vars.
            global_vars = {"timestep": self._counters[NUM_AGENT_STEPS_SAMPLED]}
            for worker in self.workers.remote_workers():
                worker.set_global_vars.remote(global_vars)

        # Sync down the weights from 1st remote worker (only if we have received
        # some results from it).
        # As with the sync up, this is not really needed unless the user is
        # reading the local weights.
        if (
            self.config["keep_local_weights_in_sync"]
            and first_worker in sample_and_update_results
        ):
            self.workers.local_worker().set_weights(
                ray.get(first_worker.get_weights.remote())
            )
        # Return merged laarner into results.
        new_learner_info = learner_info_builder.finalize()
        if new_learner_info:
            self._curr_learner_info = new_learner_info
        return self._curr_learner_info
Ejemplo n.º 4
0
    def process_trained_results(self) -> ResultDict:
        # Get learner outputs/stats from output queue.
        learner_infos = []
        num_env_steps_trained = 0
        num_agent_steps_trained = 0

        # Loop through output queue and update our counts.
        for _ in range(self._learner_thread.outqueue.qsize()):
            if self._learner_thread.is_alive():
                (
                    env_steps,
                    agent_steps,
                    learner_results,
                ) = self._learner_thread.outqueue.get(timeout=0.001)
                num_env_steps_trained += env_steps
                num_agent_steps_trained += agent_steps
                if learner_results:
                    learner_infos.append(learner_results)
            else:
                raise RuntimeError("The learner thread died while training")
        # Nothing new happened since last time, use the same learner stats.
        if not learner_infos:
            final_learner_info = copy.deepcopy(
                self._learner_thread.learner_info)
        # Accumulate learner stats using the `LearnerInfoBuilder` utility.
        else:
            builder = LearnerInfoBuilder()
            for info in learner_infos:
                builder.add_learn_on_batch_results_multi_agent(info)
            final_learner_info = builder.finalize()

        # Update the steps trained counters.
        self._counters[NUM_ENV_STEPS_TRAINED] += num_env_steps_trained
        self._counters[NUM_AGENT_STEPS_TRAINED] += num_agent_steps_trained

        return final_learner_info
Ejemplo n.º 5
0
    def training_iteration(self) -> ResultDict:
        # Shortcut.
        local_worker = self.workers.local_worker()

        # Define the function executed in parallel by all RolloutWorkers to collect
        # samples + compute and return gradients (and other information).

        def sample_and_compute_grads(worker: RolloutWorker) -> Dict[str, Any]:
            """Call sample() and compute_gradients() remotely on workers."""
            samples = worker.sample()
            grads, infos = worker.compute_gradients(samples)
            return {
                "grads": grads,
                "infos": infos,
                "agent_steps": samples.agent_steps(),
                "env_steps": samples.env_steps(),
            }

        # Perform rollouts and gradient calculations asynchronously.
        with self._timers[GRAD_WAIT_TIMER]:
            # Results are a mapping from ActorHandle (RolloutWorker) to their
            # returned gradient calculation results.
            async_results: Dict[
                ActorHandle, Dict] = asynchronous_parallel_requests(
                    remote_requests_in_flight=self.remote_requests_in_flight,
                    actors=self.workers.remote_workers(),
                    ray_wait_timeout_s=0.0,
                    max_remote_requests_in_flight_per_actor=1,
                    remote_fn=sample_and_compute_grads,
                )

        # Loop through all fetched worker-computed gradients (if any)
        # and apply them - one by one - to the local worker's model.
        # After each apply step (one step per worker that returned some gradients),
        # update that particular worker's weights.
        global_vars = None
        learner_info_builder = LearnerInfoBuilder(num_devices=1)
        for worker, result in async_results.items():
            # Apply gradients to local worker.
            with self._timers[APPLY_GRADS_TIMER]:
                local_worker.apply_gradients(result["grads"])
            self._timers[APPLY_GRADS_TIMER].push_units_processed(
                result["agent_steps"])

            # Update all step counters.
            self._counters[NUM_AGENT_STEPS_SAMPLED] += result["agent_steps"]
            self._counters[NUM_ENV_STEPS_SAMPLED] += result["env_steps"]
            self._counters[NUM_AGENT_STEPS_TRAINED] += result["agent_steps"]
            self._counters[NUM_ENV_STEPS_TRAINED] += result["env_steps"]

            # Create current global vars.
            global_vars = {
                "timestep": self._counters[NUM_AGENT_STEPS_SAMPLED],
            }

            # Synch updated weights back to the particular worker.
            with self._timers[SYNCH_WORKER_WEIGHTS_TIMER]:
                weights = local_worker.get_weights(
                    local_worker.get_policies_to_train())
                worker.set_weights.remote(weights, global_vars)

            learner_info_builder.add_learn_on_batch_results_multi_agent(
                result["infos"])

        # Update global vars of the local worker.
        if global_vars:
            local_worker.set_global_vars(global_vars)

        return learner_info_builder.finalize()