Exemple #1
0
    def _create_generators(
        self,
        make_sampler_fn: Callable[..., TaskSampler],
        sampler_fn_args: Sequence[Dict[str, Any]],
    ) -> List[Generator]:

        generators = []
        for id, current_sampler_fn_args in enumerate(sampler_fn_args):
            if self.should_log:
                get_logger().info(
                    "Starting {}-th SingleProcessVectorSampledTasks generator with args {}"
                    .format(id, current_sampler_fn_args))
            generators.append(
                self._task_sampling_loop_generator_fn(
                    worker_id=id,
                    make_sampler_fn=make_sampler_fn,
                    sampler_fn_args=current_sampler_fn_args,
                    auto_resample_when_done=self._auto_resample_when_done,
                    should_log=self.should_log,
                ))

            if next(generators[-1]) != "started":
                raise RuntimeError("Generator failed to start.")

        return generators
Exemple #2
0
    def process_eval_package(
        self, log_writer, pkg, all_results: Optional[List[Any]] = None
    ):
        pkg_type, payload, steps = pkg
        metrics_pkg, task_outputs, render, checkpoint_file_name = payload

        metrics_type, metrics_payload, num_tasks = metrics_pkg

        mode = pkg_type.split("_")[0]

        metrics = OrderedDict(
            sorted(
                [(k, v) for k, v in metrics_payload.items() if k != "task_info"],
                key=lambda x: x[0],
            )
        )

        if all_results is not None:
            results = copy.deepcopy(metrics)
            results.update({"training_steps": steps, "tasks": task_outputs})
            all_results.append(results)

        message = ["{} {} steps:".format(mode, steps)]
        for k in metrics:
            log_writer.add_scalar("{}/".format(mode) + k, metrics[k], steps)
            message.append(k + " {}".format(metrics[k]))
        message.append("tasks {} checkpoint {}".format(num_tasks, checkpoint_file_name))
        get_logger().info(" ".join(message))

        # if render is not None:
        #     log_writer.add_vid("{}/agent_view".format(mode), render, steps)

        if self.visualizer is not None:
            self.visualizer.log(log_writer, task_outputs, render, steps)
Exemple #3
0
    def log(
        self,
        log_writer: SummaryWriter,
        task_outputs: Optional[List[Any]],
        render: Optional[Dict[str, List[Dict[str, Any]]]],
        num_steps: int,
    ):
        viz_order, all_episodes = self._auto_viz_order(task_outputs)
        if viz_order is None:
            get_logger().debug("trajectory viz returning without visualizing")
            return

        for page, current_ids in enumerate(viz_order):
            figs = []
            for episode_id in current_ids:
                # assert episode_id in all_episodes
                if episode_id not in all_episodes:
                    get_logger().warning(
                        "skipping viz for missing episode {}".format(
                            episode_id))
                    continue
                figs.append(self.make_fig(all_episodes[episode_id],
                                          episode_id))
            if len(figs) == 0:
                continue
            log_writer.add_figure(
                "{}/{}_group{}".format(self.mode, self.label, page),
                figs,
                global_step=num_steps,
            )
            plt.close(
                "all"
            )  # close all current figures (SummaryWriter already closes all figures we log)
Exemple #4
0
    def find_distance(
        self,
        position: Dict[str, Any],
        target: Union[Dict[str, Any], str],
        native_distance_function: Callable[
            [Dict[str, Any], Union[Dict[str, Any], str]], float],
    ) -> float:
        # Convert the position to its rounded string representation
        position_str = self._pos_to_str(position)
        # If the target is also a position, convert it to its rounded string representation
        if isinstance(target, str):
            target_str = target
        else:
            target_str = self._pos_to_str(target)

        if position_str not in self.cache:
            self.cache[position_str] = {}
        if target_str not in self.cache[position_str]:
            self.cache[position_str][target_str] = native_distance_function(
                position, target)
            self.misses += 1
        else:
            self.hits += 1
        self.num_accesses += 1
        if self.num_accesses % 1000 == 0:
            get_logger().debug("Cache Miss-Hit Ratio: %.4f" %
                               (self.misses / self.hits))
        return self.cache[position_str][target_str]
Exemple #5
0
    def save_project_state(self):
        base_dir = os.path.join(
            self.output_dir,
            "used_configs",
            self.config.tag() if self.extra_tag == "" else os.path.join(
                self.config.tag(), self.extra_tag),
            self.local_start_time_str,
        )
        os.makedirs(base_dir, exist_ok=True)

        # Saving current git diff
        sha, diff_str = get_git_diff_of_project()
        with open(os.path.join(base_dir, "{}.patch".format(sha)), "w") as f:
            f.write(diff_str)

        get_logger().info("Git diff saved to {}".format(base_dir))

        # Recursively saving configs
        if self.loaded_config_src_files is not None:
            for file in self.loaded_config_src_files:
                base, module = self.loaded_config_src_files[file]
                parts = module.split(".")

                src_file = os.path.sep.join([base] + parts) + ".py"
                assert os.path.isfile(
                    src_file), "Config file {} not found".format(src_file)

                dst_file = os.path.join(
                    base_dir,
                    os.path.join(*parts[1:]),
                ) + ".py"
                os.makedirs(os.path.dirname(dst_file), exist_ok=True)
                shutil.copy(src_file, dst_file)

        get_logger().info("Config files saved to {}".format(base_dir))
Exemple #6
0
    def narrow(self):
        assert len(self.unnarrow_data) == 0, "attempting to narrow narrowed rollouts"

        if self.step == 0:  # we're actually done
            get_logger().warning("Called narrow with self.step == 0")
            return

        for storage_name in ["observations", "memory"]:
            storage: Memory = getattr(self, storage_name)
            for key in storage:
                self.unnarrow_data[storage_name][key] = storage.tensor(key)
                storage[key] = (
                    storage.tensor(key).narrow(dim=0, start=0, length=self.step + 1),
                    storage.sampler_dim(key),
                )

        for name in ["prev_actions", "value_preds", "returns", "masks"]:
            self.unnarrow_data[name] = getattr(self, name)
            setattr(
                self,
                name,
                self.unnarrow_data[name].narrow(dim=0, start=0, length=self.step + 1),
            )

        for name in ["actions", "action_log_probs", "rewards"]:
            self.unnarrow_data[name] = getattr(self, name)
            setattr(
                self,
                name,
                self.unnarrow_data[name].narrow(dim=0, start=0, length=self.step),
            )

        self.unnarrow_data["num_steps"] = self.num_steps
        self.num_steps = self.step
        self.step = 0  # we just finished a rollout, so we reset it for the next one
Exemple #7
0
def clips_to_video(clips, h, w, c):
    # encode sequence of images into gif string
    clip = concatenate_videoclips(clips)

    filename = tempfile.NamedTemporaryFile(suffix=".gif", delete=False).name

    # moviepy >= 1.0.0 use logger=None to suppress output.
    try:
        clip.write_gif(filename, verbose=False, logger=None)
    except TypeError:
        get_logger().warning(
            "Upgrade to moviepy >= 1.0.0 to suppress the progress bar.")
        clip.write_gif(filename, verbose=False)

    with open(filename, "rb") as f:
        tensor_string = f.read()

    try:
        os.remove(filename)
    except OSError:
        get_logger().warning(
            "The temporary file used by moviepy cannot be deleted.")

    return TBXSummary.Image(height=h,
                            width=w,
                            colorspace=c,
                            encoded_image_string=tensor_string)
    def next_task(
            self,
            force_advance_scene: bool = False) -> Optional[ObjectNavTask]:
        if self.max_tasks is not None and self.max_tasks <= 0:
            return None

        if self.episode_index >= len(
                self.episodes[self.scenes[self.scene_index]]):
            self.scene_index = (self.scene_index + 1) % len(self.scenes)
            # shuffle the new list of episodes to train on
            random.shuffle(self.episodes[self.scenes[self.scene_index]])
            self.episode_index = 0
        scene = self.scenes[self.scene_index]
        episode = self.episodes[scene][self.episode_index]
        if self.env is not None:
            if scene.replace("_physics", "") != self.env.scene_name.replace(
                    "_physics", ""):
                self.env.reset(
                    scene_name=scene,
                    filtered_objects=list(
                        set([e["object_id"] for e in self.episodes[scene]])),
                )
        else:
            self.env = self._create_environment()
            self.env.reset(
                scene_name=scene,
                filtered_objects=list(
                    set([e["object_id"] for e in self.episodes[scene]])),
            )
        task_info = {"scene": scene, "object_type": episode["object_type"]}
        if len(task_info) == 0:
            get_logger().warning("Scene {} does not contain any"
                                 " objects of any of the types {}.".format(
                                     scene, self.object_types))
        task_info["initial_position"] = episode["initial_position"]
        task_info["initial_orientation"] = episode["initial_orientation"]
        task_info["distance_to_target"] = episode["shortest_path_length"]
        task_info["path_to_target"] = episode["shortest_path"]
        task_info["object_type"] = episode["object_type"]
        task_info["id"] = episode["id"]
        if self.allow_flipping and random.random() > 0.5:
            task_info["mirrored"] = True
        else:
            task_info["mirrored"] = False

        self.episode_index += 1
        if self.max_tasks is not None:
            self.max_tasks -= 1
        if not self.env.teleport(episode["initial_position"],
                                 episode["initial_orientation"]):
            return self.next_task()
        self._last_sampled_task = ObjectNavTask(
            env=self.env,
            sensors=self.sensors,
            task_info=task_info,
            max_steps=self.max_steps,
            action_space=self._action_space,
            reward_configs=self.rewards_config,
        )
        return self._last_sampled_task
Exemple #9
0
    def log(
        self,
        log_writer: SummaryWriter,
        task_outputs: Optional[List[Any]],
        render: Optional[Dict[str, List[Dict[str, Any]]]],
        num_steps: int,
    ):
        if render is None:
            return

        datum_id = self._source_to_str(self.vector_task_sources[0],
                                       is_vector_task=True)
        for page, current_ids in enumerate(self.episode_ids):
            images = []  # list of lists of rgb frames
            for episode_id in current_ids:
                # assert episode_id in render
                if episode_id not in render:
                    get_logger().warning(
                        "skipping viz for missing episode {}".format(
                            episode_id))
                    continue
                # TODO overlay episode id?
                images.append([step[datum_id] for step in render[episode_id]])
            if len(images) == 0:
                continue
            vid = self.make_vid(images)
            if vid is not None:
                log_writer.add_vid(
                    "{}/{}_group{}".format(self.mode, self.label, page),
                    vid,
                    global_step=num_steps,
                )
Exemple #10
0
    def judge(self) -> float:
        reward = -0.01

        new_geodesic_distance = self.current_geodesic_dist_to_target()
        if self.last_geodesic_distance is None:
            self.last_geodesic_distance = new_geodesic_distance

        if self.last_geodesic_distance is not None:
            if (new_geodesic_distance is None
                    or new_geodesic_distance in [float("-inf"),
                                                 float("inf")]
                    or np.isnan(new_geodesic_distance)):
                new_geodesic_distance = self.last_geodesic_distance
            delta_distance_reward = self.last_geodesic_distance - new_geodesic_distance
            reward += delta_distance_reward
            self.last_geodesic_distance = new_geodesic_distance

            if self.is_done():
                reward += 10.0 if self._success else self.failed_end_reward
        else:
            get_logger().warning(
                "Could not get geodesic distance from habitat env.")

        self._rewards.append(float(reward))

        return float(reward)
Exemple #11
0
 def worker_devices(self, mode: str):
     # Note: Avoid instantiating preprocessors in machine_params (use Builder if needed)
     devices = self.config.machine_params(mode)["gpu_ids"]
     if len(devices) > 0:
         if torch.device(devices[0]) == torch.device("cpu"):
             assert all_equal(
                 devices
             ), "Specified devices {} must be all non-negative integers or all equal to 'cpu'".format(
                 devices
             )
             devices = [torch.device(d) for d in devices]
         else:
             assert all(
                 [gpu_id >= 0 for gpu_id in devices]
             ), "all gpu_ids must be >= 0"
             assert torch.cuda.device_count() > max(
                 set(devices)
             ), "{} CUDA devices available for requested {} gpu ids {}".format(
                 torch.cuda.device_count(), mode, devices
             )
     else:
         devices = [torch.device("cpu")]
     get_logger().info(
         "Using {} {} workers on devices {}".format(len(devices), mode, devices)
     )
     return devices
Exemple #12
0
    def log(
        self,
        log_writer: SummaryWriter,
        task_outputs: Optional[List[Any]],
        render: Optional[Dict[str, List[Dict[str, Any]]]],
        num_steps: int,
    ):
        if render is None:
            return

        for page, current_ids in enumerate(self.episode_ids):
            figs = []
            for episode_id in current_ids:
                if episode_id not in render or len(render[episode_id]) == 0:
                    get_logger().warning(
                        "skipping viz for missing or 0-length episode {}".
                        format(episode_id))
                    continue
                episode_src = [
                    step[self.datum_id] for step in render[episode_id]
                    if self.datum_id in step
                ]

                figs.append(self.make_fig(episode_src, episode_id))
            if len(figs) == 0:
                continue
            log_writer.add_figure(
                "{}/{}_group{}".format(self.mode, self.label, page),
                figs,
                global_step=num_steps,
            )
            plt.close(
                "all"
            )  # close all current figures (SummaryWriter already closes all figures we log)
Exemple #13
0
    def process_eval_package(self, log_writer: SummaryWriter,
                             pkg: LoggingPackage):
        training_steps = pkg.training_steps
        checkpoint_file_name = pkg.checkpoint_file_name
        render = pkg.viz_data
        task_outputs = pkg.metric_dicts

        num_tasks = pkg.num_non_empty_metrics_dicts_added
        metric_means = pkg.metrics_tracker.means()

        mode = pkg.mode

        log_writer.add_scalar(f"{mode}/num_tasks_evaled", num_tasks,
                              training_steps)

        message = [f"{mode} {training_steps} steps:"]
        for k in sorted(metric_means.keys()):
            log_writer.add_scalar(f"{mode}/{k}", metric_means[k],
                                  training_steps)
            message.append(f"{k} {metric_means[k]}")
        message.append(f"tasks {num_tasks} checkpoint {checkpoint_file_name}")
        get_logger().info(" ".join(message))

        if self.visualizer is not None:
            self.visualizer.log(
                log_writer=log_writer,
                task_outputs=task_outputs,
                render=render,
                num_steps=training_steps,
            )
Exemple #14
0
    def loss_per_step(  # type: ignore
        self,
        step_count: int,
        batch: ObservationType,
        actor_critic_output: ActorCriticOutput[CategoricalDistr],
    ) -> Dict[str, Tuple[torch.Tensor, Optional[float]]]:
        actions = typing.cast(torch.LongTensor, batch["actions"])
        values = actor_critic_output.values
        action_log_probs = actor_critic_output.distributions.log_probs(actions)

        dist_entropy: torch.FloatTensor = actor_critic_output.distributions.entropy(
        )
        value_loss = 0.5 * (typing.cast(torch.FloatTensor, batch["returns"]) -
                            values).pow(2)

        # TODO: Decided not to use normalized advantages here,
        #   is this correct? (it's how it's done in Kostrikov's)
        action_loss = -(typing.cast(
            torch.FloatTensor, batch["adv_targ"]).detach() * action_log_probs)

        if self.acktr:
            # TODO: Currently acktr doesn't really work because of this natural gradient stuff
            #   that we should figure out how to integrate properly.
            get_logger().warning("acktr is only partially supported.")

        return {
            "value": (value_loss, self.value_loss_coef),
            "action": (action_loss, None),
            "entropy":
            (dist_entropy.mul_(-1.0), self.entropy_coef),  # type: ignore
        }
Exemple #15
0
    def process_train_packages(self,
                               log_writer,
                               pkgs,
                               last_steps=0,
                               last_time=0.0):
        current_time = time.time()

        pkg_types, payloads, all_steps = [vals for vals in zip(*pkgs)]

        steps = all_steps[0]

        all_info_types = [worker_pkgs for worker_pkgs in zip(*payloads)]

        message = ["train {} steps:".format(steps)]
        for info_type in all_info_types:
            message += self.aggregate_infos(log_writer, info_type, steps)
        message += ["elapsed_time {:.3g}s".format(current_time - last_time)]

        if last_steps > 0:
            fps = (steps - last_steps) / (current_time - last_time)
            message += ["approx_fps {:.3g}".format(fps)]
            log_writer.add_scalar("train/approx_fps", fps, steps)
        get_logger().info(" ".join(message))

        return steps, current_time
Exemple #16
0
def create_minigrid_offpolicy_data_iterator(
    path: str,
    nrollouts: int,
    rollout_len: int,
    instr_len: Optional[int],
    restrict_max_steps_in_dataset: Optional[int] = None,
    current_worker: Optional[int] = None,
    num_workers: Optional[int] = None,
) -> ExpertTrajectoryIterator:
    path = os.path.abspath(path)

    assert (current_worker is None) == (
        num_workers is None
    ), "both current_worker and num_workers must be simultaneously defined or undefined"

    if path not in _DATASET_CACHE:
        get_logger().info(
            "Loading minigrid dataset from {} for first time...".format(path))
        _DATASET_CACHE[path] = babyai.utils.load_demos(path)
        assert _DATASET_CACHE[path] is not None and len(
            _DATASET_CACHE[path]) != 0
        get_logger().info(
            "Loading minigrid dataset complete, it contains {} trajectories".
            format(len(_DATASET_CACHE[path])))

    return ExpertTrajectoryIterator(
        data=_DATASET_CACHE[path],
        nrollouts=nrollouts,
        rollout_len=rollout_len,
        instr_len=instr_len,
        restrict_max_steps_in_dataset=restrict_max_steps_in_dataset,
        current_worker=current_worker,
        num_workers=num_workers,
    )
Exemple #17
0
    def process_train_packages(
        self,
        log_writer: SummaryWriter,
        pkgs: List[LoggingPackage],
        last_steps=0,
        last_offpolicy_steps=0,
        last_time=0.0,
    ):
        assert self.mode == "train"

        current_time = time.time()

        training_steps = pkgs[0].training_steps
        offpolicy_steps = pkgs[0].off_policy_steps
        log_writer.add_scalar(
            tag="train/pipeline_stage",
            scalar_value=pkgs[0].pipeline_stage,
            global_step=training_steps,
        )

        metrics_and_train_info_tracker = ScalarMeanTracker()
        for pkg in pkgs:
            metrics_and_train_info_tracker.add_scalars(
                scalars=pkg.metrics_tracker.means(),
                n=pkg.metrics_tracker.counts())
            metrics_and_train_info_tracker.add_scalars(
                scalars=pkg.train_info_tracker.means(),
                n=pkg.train_info_tracker.counts(),
            )

        message = [
            "train {} steps {} offpolicy:".format(training_steps,
                                                  offpolicy_steps)
        ]
        means = metrics_and_train_info_tracker.means()
        for k in sorted(means.keys(),
                        key=lambda mean_key: ("/" in mean_key, mean_key)):
            if "offpolicy" not in k:
                log_writer.add_scalar("{}/".format(self.mode) + k, means[k],
                                      training_steps)
            else:
                log_writer.add_scalar(k, means[k], training_steps)
            message.append(k + " {:.3g}".format(means[k]))
        message += ["elapsed_time {:.3g}s".format(current_time - last_time)]

        if last_steps > 0:
            fps = (training_steps - last_steps) / (current_time - last_time)
            message += ["approx_fps {:.3g}".format(fps)]
            log_writer.add_scalar("train/approx_fps", fps, training_steps)

        if last_offpolicy_steps > 0:
            fps = (offpolicy_steps - last_offpolicy_steps) / (current_time -
                                                              last_time)
            message += ["offpolicy/approx_fps {:.3g}".format(fps)]
            log_writer.add_scalar("offpolicy/approx_fps", fps, training_steps)

        get_logger().info(" ".join(message))

        return training_steps, offpolicy_steps, current_time
Exemple #18
0
 def checkpoint_start_time_str(checkpoint_file_name):
     parts = checkpoint_file_name.split(os.path.sep)
     assert len(parts) > 1, "{} is not a valid checkpoint path".format(
         checkpoint_file_name)
     start_time_str = parts[-2]
     get_logger().info(
         "Using checkpoint start time {}".format(start_time_str))
     return start_time_str
Exemple #19
0
 def logif(s: Union[str, Exception]):
     if verbose:
         if isinstance(s, str):
             get_logger().info(s)
         elif isinstance(s, Exception):
             get_logger().exception(traceback.format_exc())
         else:
             raise NotImplementedError()
Exemple #20
0
    def process_test_packages(
        self,
        log_writer: Optional[SummaryWriter],
        pkgs: List[LoggingPackage],
        all_results: Optional[List[Any]] = None,
    ):
        mode = pkgs[0].mode
        assert mode == "test"

        training_steps = pkgs[0].training_steps

        all_metrics_tracker = ScalarMeanTracker()
        metric_dicts_list, render, checkpoint_file_name = [], {}, []
        for pkg in pkgs:
            all_metrics_tracker.add_scalars(
                scalars=pkg.metrics_tracker.means(),
                n=pkg.metrics_tracker.counts())
            metric_dicts_list.extend(pkg.metric_dicts)
            if pkg.viz_data is not None:
                render.update(pkg.viz_data)
            checkpoint_file_name.append(pkg.checkpoint_file_name)

        assert all_equal(checkpoint_file_name)

        message = [f"{mode} {training_steps} steps:"]

        metric_means = all_metrics_tracker.means()
        for k in sorted(metric_means.keys()):
            if log_writer is not None:
                log_writer.add_scalar(f"{mode}/{k}", metric_means[k],
                                      training_steps)
            message.append(k + " {:.3g}".format(metric_means[k]))

        if all_results is not None:
            results = copy.deepcopy(metric_means)
            results.update({
                "training_steps": training_steps,
                "tasks": metric_dicts_list
            })
            all_results.append(results)

        num_tasks = sum(
            [pkg.num_non_empty_metrics_dicts_added for pkg in pkgs])
        if log_writer is not None:
            log_writer.add_scalar(f"{mode}/num_tasks_evaled", num_tasks,
                                  training_steps)

        message.append("tasks {} checkpoint {}".format(
            num_tasks, checkpoint_file_name[0]))
        get_logger().info(" ".join(message))

        if self.visualizer is not None:
            self.visualizer.log(
                log_writer=log_writer,
                task_outputs=metric_dicts_list,
                render=render,
                num_steps=training_steps,
            )
Exemple #21
0
    def test_loop(id: int = 0, *engine_args, **engine_kwargs):
        OnPolicyRunner.init_process("Test", id)
        engine_kwargs["mode"] = "test"
        engine_kwargs["worker_id"] = id
        get_logger().info("test {} args {}".format(id, engine_kwargs))

        test = OnPolicyRunner.init_worker(OnPolicyInference, engine_args, engine_kwargs)
        if test is not None:
            test.process_checkpoints()  # gets checkpoints via queue
Exemple #22
0
    def __init__(
        self,
        env: RoboThorEnvironment,
        sensors: List[Sensor],
        task_info: Dict[str, Any],
        max_steps: int,
        reward_configs: Dict[str, Any],
        distance_cache: Optional[Dict[str, Any]] = None,
        episode_info: Optional[Dict[str, Any]] = None,
        **kwargs
    ) -> None:
        super().__init__(
            env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs
        )
        self.reward_configs = reward_configs
        self._took_end_action: bool = False
        self._success: Optional[bool] = False
        self.distance_cache = distance_cache

        if episode_info:
            self.episode_optimal_corners = episode_info["shortest_path"]
            dist = episode_info["shortest_path_length"]
        else:
            self.episode_optimal_corners = self.env.path_corners(
                task_info["target"]
            )  # assume it's valid (sampler must take care)!
            dist = self.env.path_corners_to_dist(self.episode_optimal_corners)
        if dist == float("inf"):
            dist = -1.0  # -1.0 for unreachable
            get_logger().warning(
                "No path for {} from {} to {}".format(
                    self.env.scene_name, self.env.agent_state(), task_info["target"]
                )
            )

        if self.distance_cache:
            self.last_geodesic_distance = get_distance(
                self.distance_cache, self.env.agent_state(), self.task_info["target"]
            )
        else:
            self.last_geodesic_distance = self.env.dist_to_point(
                self.task_info["target"]
            )

        self.optimal_distance = self.last_geodesic_distance
        self._rewards: List[float] = []
        self._distance_to_goal: List[float] = []
        self._metrics = None
        self.path: List[Any] = (
            []
        )  # the initial coordinate will be directly taken from the optimal path

        self.task_info["followed_path"] = [self.env.agent_state()]
        self.task_info["action_names"] = self.action_names()
        self.num_moves_made = 0
Exemple #23
0
    def valid_loop(id: int = 0, *engine_args, **engine_kwargs):
        OnPolicyRunner.init_process("Valid", id)
        engine_kwargs["mode"] = "valid"
        engine_kwargs["worker_id"] = id
        get_logger().info("valid {} args {}".format(id, engine_kwargs))

        valid = OnPolicyRunner.init_worker(engine_class=OnPolicyInference,
                                           args=engine_args,
                                           kwargs=engine_kwargs)
        if valid is not None:
            valid.process_checkpoints()  # gets checkpoints via queue
Exemple #24
0
    def _spawn_workers(
        self,
        make_sampler_fn: Callable[..., TaskSampler],
        sampler_fn_args_list: Sequence[Sequence[Dict[str, Any]]],
    ) -> Tuple[List[Callable[[], Any]], List[Callable[[Any], None]]]:
        parent_connections, worker_connections = zip(
            *[self._mp_ctx.Pipe(duplex=True) for _ in range(self._num_processes)]
        )
        self._workers = []
        k = 0
        id: Union[int, str]
        for id, stuff in enumerate(
            zip(worker_connections, parent_connections, sampler_fn_args_list)
        ):
            worker_conn, parent_conn, current_sampler_fn_args_list = stuff  # type: ignore

            if len(current_sampler_fn_args_list) != 1:
                id = "{}({}-{})".format(
                    id, k, k + len(current_sampler_fn_args_list) - 1
                )
                k += len(current_sampler_fn_args_list)

            if self.should_log:
                get_logger().info(
                    "Starting {}-th VectorSampledTask worker with args {}".format(
                        id, current_sampler_fn_args_list
                    )
                )
            ps = self._mp_ctx.Process(  # type: ignore
                target=self._task_sampling_loop_worker,
                args=(
                    id,
                    worker_conn.recv,
                    worker_conn.send,
                    make_sampler_fn,
                    current_sampler_fn_args_list,
                    self._auto_resample_when_done,
                    self.metrics_out_queue,
                    self.should_log,
                    worker_conn,
                    parent_conn,
                ),
            )
            self._workers.append(ps)
            ps.daemon = True
            ps.start()
            worker_conn.close()
            time.sleep(
                0.1
            )  # Useful to ensure things don't lock up when spawning many envs
        return (
            [p.recv for p in parent_connections],
            [p.send for p in parent_connections],
        )
Exemple #25
0
    def _is_goal_in_range(self) -> Optional[bool]:
        tget = self.task_info["target"]
        dist = self.dist_to_target()

        if -0.5 < dist <= 0.2:
            return True
        elif dist > 0.2:
            return False
        else:
            get_logger().debug("No path for {} from {} to {}".format(
                self.env.scene_name, self.env.agent_state(), tget))
            return None
Exemple #26
0
 def retry_dist(position: Dict[str, float], target: Dict[str, float]):
     d = self.distance_from_point_to_point(position, target, 0.05)
     if d < 0:
         get_logger().warning(
             f"Could not find a path from {position} to {target} with 0.05 error tolerance."
             f" Increasing this tolerance to 0.1 any trying again.")
         d = self.distance_from_point_to_point(position, target, 0.1)
         if d < 0:
             get_logger().warning(
                 f"Could not find a path from {position} to {target} with 0.1 error tolerance."
                 f" Returning a distance of -1.")
     return d
Exemple #27
0
    def worker_devices(self, mode: str):
        machine_params: MachineParams = MachineParams.instance_from(
            self.config.machine_params(mode))
        devices = machine_params.devices

        assert all_equal(devices) or all(
            d.index >= 0 for d in devices
        ), f"Cannot have a mix of CPU and GPU devices (`devices == {devices}`)"

        get_logger().info("Using {} {} workers on devices {}".format(
            len(devices), mode, devices))
        return devices
Exemple #28
0
    def __init__(self,
                 world_dim: int,
                 world_radius: int,
                 sensors: Union[SensorSuite, List[Sensor]],
                 max_steps: int,
                 max_tasks: Optional[int] = None,
                 num_unique_seeds: Optional[int] = None,
                 task_seeds_list: Optional[List[int]] = None,
                 deterministic_sampling: bool = False,
                 seed: Optional[int] = None,
                 **kwargs):
        self.env = LightHouseEnvironment(world_dim=world_dim,
                                         world_radius=world_radius)

        self._last_sampled_task: Optional[FindGoalLightHouseTask] = None
        self.sensors = (SensorSuite(sensors)
                        if not isinstance(sensors, SensorSuite) else sensors)
        self.max_steps = max_steps
        self.max_tasks = max_tasks
        self.num_tasks_generated = 0
        self.deterministic_sampling = deterministic_sampling

        self.num_unique_seeds = num_unique_seeds
        self.task_seeds_list = task_seeds_list
        assert (self.num_unique_seeds is
                None) or (0 < self.num_unique_seeds
                          ), "`num_unique_seeds` must be a positive integer."

        self.num_unique_seeds = num_unique_seeds
        self.task_seeds_list = task_seeds_list
        if self.task_seeds_list is not None:
            if self.num_unique_seeds is not None:
                assert self.num_unique_seeds == len(
                    self.task_seeds_list
                ), "`num_unique_seeds` must equal the length of `task_seeds_list` if both specified."
            self.num_unique_seeds = len(self.task_seeds_list)
        elif self.num_unique_seeds is not None:
            self.task_seeds_list = list(range(self.num_unique_seeds))

        assert (not deterministic_sampling) or (
            self.num_unique_seeds is not None
        ), "Cannot use deterministic sampling when `num_unique_seeds` is `None`."

        if (not deterministic_sampling) and self.max_tasks:
            get_logger().warning(
                "`deterministic_sampling` is `False` but you have specified `max_tasks < inf`,"
                " this might be a mistake when running testing.")

        self.seed: int = int(
            seed if seed is not None else np.random.randint(0, 2**31 - 1))
        self.np_seeded_random_gen: Optional[np.random.RandomState] = None
        self.set_seed(self.seed)
Exemple #29
0
    def dump_top_down_view(self, room_name: str, image_path: str):
        get_logger().debug("Dumping {}".format(image_path))

        self.controller.reset(room_name)
        self.controller.step({
            "action": "Initialize",
            "gridSize": 0.1,
            "makeAgentsVisible": False
        })
        self.controller.step({"action": "ToggleMapView"})
        top_down_view = self.controller.last_event.cv2img

        cv2.imwrite(image_path, top_down_view)
Exemple #30
0
    def __init__(
        self,
        env_builder: Union[str, Callable[..., MiniGridEnv]],
        sensors: Union[SensorSuite, List[Sensor]],
        max_tasks: Optional[int] = None,
        num_unique_seeds: Optional[int] = None,
        task_seeds_list: Optional[List[int]] = None,
        deterministic_sampling: bool = False,
        extra_task_kwargs: Optional[Dict] = None,
        **kwargs,
    ):
        super(BabyAITaskSampler, self).__init__()
        self.sensors = (SensorSuite(sensors)
                        if not isinstance(sensors, SensorSuite) else sensors)
        self.max_tasks = max_tasks
        self.num_unique_seeds = num_unique_seeds
        self.deterministic_sampling = deterministic_sampling
        self.extra_task_kwargs = (extra_task_kwargs
                                  if extra_task_kwargs is not None else {})

        self._last_env_seed: Optional[int] = None
        self._last_task: Optional[BabyAITask] = None

        assert (self.num_unique_seeds is
                None) or (0 < self.num_unique_seeds
                          ), "`num_unique_seeds` must be a positive integer."

        self.num_unique_seeds = num_unique_seeds
        self.task_seeds_list = task_seeds_list
        if self.task_seeds_list is not None:
            if self.num_unique_seeds is not None:
                assert self.num_unique_seeds == len(
                    self.task_seeds_list
                ), "`num_unique_seeds` must equal the length of `task_seeds_list` if both specified."
            self.num_unique_seeds = len(self.task_seeds_list)
        elif self.num_unique_seeds is not None:
            self.task_seeds_list = list(range(self.num_unique_seeds))

        if (not deterministic_sampling) and self.max_tasks:
            get_logger().warning(
                "`deterministic_sampling` is `False` but you have specified `max_tasks < inf`,"
                " this might be a mistake when running testing.")

        if isinstance(env_builder, str):
            self.env = gym.make(env_builder)
        else:
            self.env = env_builder()

        self.np_seeded_random_gen, _ = seeding.np_random(
            random.randint(0, 2**31 - 1))
        self.num_tasks_generated = 0