def _create_generators( self, make_sampler_fn: Callable[..., TaskSampler], sampler_fn_args: Sequence[Dict[str, Any]], ) -> List[Generator]: generators = [] for id, current_sampler_fn_args in enumerate(sampler_fn_args): if self.should_log: get_logger().info( "Starting {}-th SingleProcessVectorSampledTasks generator with args {}" .format(id, current_sampler_fn_args)) generators.append( self._task_sampling_loop_generator_fn( worker_id=id, make_sampler_fn=make_sampler_fn, sampler_fn_args=current_sampler_fn_args, auto_resample_when_done=self._auto_resample_when_done, should_log=self.should_log, )) if next(generators[-1]) != "started": raise RuntimeError("Generator failed to start.") return generators
def process_eval_package( self, log_writer, pkg, all_results: Optional[List[Any]] = None ): pkg_type, payload, steps = pkg metrics_pkg, task_outputs, render, checkpoint_file_name = payload metrics_type, metrics_payload, num_tasks = metrics_pkg mode = pkg_type.split("_")[0] metrics = OrderedDict( sorted( [(k, v) for k, v in metrics_payload.items() if k != "task_info"], key=lambda x: x[0], ) ) if all_results is not None: results = copy.deepcopy(metrics) results.update({"training_steps": steps, "tasks": task_outputs}) all_results.append(results) message = ["{} {} steps:".format(mode, steps)] for k in metrics: log_writer.add_scalar("{}/".format(mode) + k, metrics[k], steps) message.append(k + " {}".format(metrics[k])) message.append("tasks {} checkpoint {}".format(num_tasks, checkpoint_file_name)) get_logger().info(" ".join(message)) # if render is not None: # log_writer.add_vid("{}/agent_view".format(mode), render, steps) if self.visualizer is not None: self.visualizer.log(log_writer, task_outputs, render, steps)
def log( self, log_writer: SummaryWriter, task_outputs: Optional[List[Any]], render: Optional[Dict[str, List[Dict[str, Any]]]], num_steps: int, ): viz_order, all_episodes = self._auto_viz_order(task_outputs) if viz_order is None: get_logger().debug("trajectory viz returning without visualizing") return for page, current_ids in enumerate(viz_order): figs = [] for episode_id in current_ids: # assert episode_id in all_episodes if episode_id not in all_episodes: get_logger().warning( "skipping viz for missing episode {}".format( episode_id)) continue figs.append(self.make_fig(all_episodes[episode_id], episode_id)) if len(figs) == 0: continue log_writer.add_figure( "{}/{}_group{}".format(self.mode, self.label, page), figs, global_step=num_steps, ) plt.close( "all" ) # close all current figures (SummaryWriter already closes all figures we log)
def find_distance( self, position: Dict[str, Any], target: Union[Dict[str, Any], str], native_distance_function: Callable[ [Dict[str, Any], Union[Dict[str, Any], str]], float], ) -> float: # Convert the position to its rounded string representation position_str = self._pos_to_str(position) # If the target is also a position, convert it to its rounded string representation if isinstance(target, str): target_str = target else: target_str = self._pos_to_str(target) if position_str not in self.cache: self.cache[position_str] = {} if target_str not in self.cache[position_str]: self.cache[position_str][target_str] = native_distance_function( position, target) self.misses += 1 else: self.hits += 1 self.num_accesses += 1 if self.num_accesses % 1000 == 0: get_logger().debug("Cache Miss-Hit Ratio: %.4f" % (self.misses / self.hits)) return self.cache[position_str][target_str]
def save_project_state(self): base_dir = os.path.join( self.output_dir, "used_configs", self.config.tag() if self.extra_tag == "" else os.path.join( self.config.tag(), self.extra_tag), self.local_start_time_str, ) os.makedirs(base_dir, exist_ok=True) # Saving current git diff sha, diff_str = get_git_diff_of_project() with open(os.path.join(base_dir, "{}.patch".format(sha)), "w") as f: f.write(diff_str) get_logger().info("Git diff saved to {}".format(base_dir)) # Recursively saving configs if self.loaded_config_src_files is not None: for file in self.loaded_config_src_files: base, module = self.loaded_config_src_files[file] parts = module.split(".") src_file = os.path.sep.join([base] + parts) + ".py" assert os.path.isfile( src_file), "Config file {} not found".format(src_file) dst_file = os.path.join( base_dir, os.path.join(*parts[1:]), ) + ".py" os.makedirs(os.path.dirname(dst_file), exist_ok=True) shutil.copy(src_file, dst_file) get_logger().info("Config files saved to {}".format(base_dir))
def narrow(self): assert len(self.unnarrow_data) == 0, "attempting to narrow narrowed rollouts" if self.step == 0: # we're actually done get_logger().warning("Called narrow with self.step == 0") return for storage_name in ["observations", "memory"]: storage: Memory = getattr(self, storage_name) for key in storage: self.unnarrow_data[storage_name][key] = storage.tensor(key) storage[key] = ( storage.tensor(key).narrow(dim=0, start=0, length=self.step + 1), storage.sampler_dim(key), ) for name in ["prev_actions", "value_preds", "returns", "masks"]: self.unnarrow_data[name] = getattr(self, name) setattr( self, name, self.unnarrow_data[name].narrow(dim=0, start=0, length=self.step + 1), ) for name in ["actions", "action_log_probs", "rewards"]: self.unnarrow_data[name] = getattr(self, name) setattr( self, name, self.unnarrow_data[name].narrow(dim=0, start=0, length=self.step), ) self.unnarrow_data["num_steps"] = self.num_steps self.num_steps = self.step self.step = 0 # we just finished a rollout, so we reset it for the next one
def clips_to_video(clips, h, w, c): # encode sequence of images into gif string clip = concatenate_videoclips(clips) filename = tempfile.NamedTemporaryFile(suffix=".gif", delete=False).name # moviepy >= 1.0.0 use logger=None to suppress output. try: clip.write_gif(filename, verbose=False, logger=None) except TypeError: get_logger().warning( "Upgrade to moviepy >= 1.0.0 to suppress the progress bar.") clip.write_gif(filename, verbose=False) with open(filename, "rb") as f: tensor_string = f.read() try: os.remove(filename) except OSError: get_logger().warning( "The temporary file used by moviepy cannot be deleted.") return TBXSummary.Image(height=h, width=w, colorspace=c, encoded_image_string=tensor_string)
def next_task( self, force_advance_scene: bool = False) -> Optional[ObjectNavTask]: if self.max_tasks is not None and self.max_tasks <= 0: return None if self.episode_index >= len( self.episodes[self.scenes[self.scene_index]]): self.scene_index = (self.scene_index + 1) % len(self.scenes) # shuffle the new list of episodes to train on random.shuffle(self.episodes[self.scenes[self.scene_index]]) self.episode_index = 0 scene = self.scenes[self.scene_index] episode = self.episodes[scene][self.episode_index] if self.env is not None: if scene.replace("_physics", "") != self.env.scene_name.replace( "_physics", ""): self.env.reset( scene_name=scene, filtered_objects=list( set([e["object_id"] for e in self.episodes[scene]])), ) else: self.env = self._create_environment() self.env.reset( scene_name=scene, filtered_objects=list( set([e["object_id"] for e in self.episodes[scene]])), ) task_info = {"scene": scene, "object_type": episode["object_type"]} if len(task_info) == 0: get_logger().warning("Scene {} does not contain any" " objects of any of the types {}.".format( scene, self.object_types)) task_info["initial_position"] = episode["initial_position"] task_info["initial_orientation"] = episode["initial_orientation"] task_info["distance_to_target"] = episode["shortest_path_length"] task_info["path_to_target"] = episode["shortest_path"] task_info["object_type"] = episode["object_type"] task_info["id"] = episode["id"] if self.allow_flipping and random.random() > 0.5: task_info["mirrored"] = True else: task_info["mirrored"] = False self.episode_index += 1 if self.max_tasks is not None: self.max_tasks -= 1 if not self.env.teleport(episode["initial_position"], episode["initial_orientation"]): return self.next_task() self._last_sampled_task = ObjectNavTask( env=self.env, sensors=self.sensors, task_info=task_info, max_steps=self.max_steps, action_space=self._action_space, reward_configs=self.rewards_config, ) return self._last_sampled_task
def log( self, log_writer: SummaryWriter, task_outputs: Optional[List[Any]], render: Optional[Dict[str, List[Dict[str, Any]]]], num_steps: int, ): if render is None: return datum_id = self._source_to_str(self.vector_task_sources[0], is_vector_task=True) for page, current_ids in enumerate(self.episode_ids): images = [] # list of lists of rgb frames for episode_id in current_ids: # assert episode_id in render if episode_id not in render: get_logger().warning( "skipping viz for missing episode {}".format( episode_id)) continue # TODO overlay episode id? images.append([step[datum_id] for step in render[episode_id]]) if len(images) == 0: continue vid = self.make_vid(images) if vid is not None: log_writer.add_vid( "{}/{}_group{}".format(self.mode, self.label, page), vid, global_step=num_steps, )
def judge(self) -> float: reward = -0.01 new_geodesic_distance = self.current_geodesic_dist_to_target() if self.last_geodesic_distance is None: self.last_geodesic_distance = new_geodesic_distance if self.last_geodesic_distance is not None: if (new_geodesic_distance is None or new_geodesic_distance in [float("-inf"), float("inf")] or np.isnan(new_geodesic_distance)): new_geodesic_distance = self.last_geodesic_distance delta_distance_reward = self.last_geodesic_distance - new_geodesic_distance reward += delta_distance_reward self.last_geodesic_distance = new_geodesic_distance if self.is_done(): reward += 10.0 if self._success else self.failed_end_reward else: get_logger().warning( "Could not get geodesic distance from habitat env.") self._rewards.append(float(reward)) return float(reward)
def worker_devices(self, mode: str): # Note: Avoid instantiating preprocessors in machine_params (use Builder if needed) devices = self.config.machine_params(mode)["gpu_ids"] if len(devices) > 0: if torch.device(devices[0]) == torch.device("cpu"): assert all_equal( devices ), "Specified devices {} must be all non-negative integers or all equal to 'cpu'".format( devices ) devices = [torch.device(d) for d in devices] else: assert all( [gpu_id >= 0 for gpu_id in devices] ), "all gpu_ids must be >= 0" assert torch.cuda.device_count() > max( set(devices) ), "{} CUDA devices available for requested {} gpu ids {}".format( torch.cuda.device_count(), mode, devices ) else: devices = [torch.device("cpu")] get_logger().info( "Using {} {} workers on devices {}".format(len(devices), mode, devices) ) return devices
def log( self, log_writer: SummaryWriter, task_outputs: Optional[List[Any]], render: Optional[Dict[str, List[Dict[str, Any]]]], num_steps: int, ): if render is None: return for page, current_ids in enumerate(self.episode_ids): figs = [] for episode_id in current_ids: if episode_id not in render or len(render[episode_id]) == 0: get_logger().warning( "skipping viz for missing or 0-length episode {}". format(episode_id)) continue episode_src = [ step[self.datum_id] for step in render[episode_id] if self.datum_id in step ] figs.append(self.make_fig(episode_src, episode_id)) if len(figs) == 0: continue log_writer.add_figure( "{}/{}_group{}".format(self.mode, self.label, page), figs, global_step=num_steps, ) plt.close( "all" ) # close all current figures (SummaryWriter already closes all figures we log)
def process_eval_package(self, log_writer: SummaryWriter, pkg: LoggingPackage): training_steps = pkg.training_steps checkpoint_file_name = pkg.checkpoint_file_name render = pkg.viz_data task_outputs = pkg.metric_dicts num_tasks = pkg.num_non_empty_metrics_dicts_added metric_means = pkg.metrics_tracker.means() mode = pkg.mode log_writer.add_scalar(f"{mode}/num_tasks_evaled", num_tasks, training_steps) message = [f"{mode} {training_steps} steps:"] for k in sorted(metric_means.keys()): log_writer.add_scalar(f"{mode}/{k}", metric_means[k], training_steps) message.append(f"{k} {metric_means[k]}") message.append(f"tasks {num_tasks} checkpoint {checkpoint_file_name}") get_logger().info(" ".join(message)) if self.visualizer is not None: self.visualizer.log( log_writer=log_writer, task_outputs=task_outputs, render=render, num_steps=training_steps, )
def loss_per_step( # type: ignore self, step_count: int, batch: ObservationType, actor_critic_output: ActorCriticOutput[CategoricalDistr], ) -> Dict[str, Tuple[torch.Tensor, Optional[float]]]: actions = typing.cast(torch.LongTensor, batch["actions"]) values = actor_critic_output.values action_log_probs = actor_critic_output.distributions.log_probs(actions) dist_entropy: torch.FloatTensor = actor_critic_output.distributions.entropy( ) value_loss = 0.5 * (typing.cast(torch.FloatTensor, batch["returns"]) - values).pow(2) # TODO: Decided not to use normalized advantages here, # is this correct? (it's how it's done in Kostrikov's) action_loss = -(typing.cast( torch.FloatTensor, batch["adv_targ"]).detach() * action_log_probs) if self.acktr: # TODO: Currently acktr doesn't really work because of this natural gradient stuff # that we should figure out how to integrate properly. get_logger().warning("acktr is only partially supported.") return { "value": (value_loss, self.value_loss_coef), "action": (action_loss, None), "entropy": (dist_entropy.mul_(-1.0), self.entropy_coef), # type: ignore }
def process_train_packages(self, log_writer, pkgs, last_steps=0, last_time=0.0): current_time = time.time() pkg_types, payloads, all_steps = [vals for vals in zip(*pkgs)] steps = all_steps[0] all_info_types = [worker_pkgs for worker_pkgs in zip(*payloads)] message = ["train {} steps:".format(steps)] for info_type in all_info_types: message += self.aggregate_infos(log_writer, info_type, steps) message += ["elapsed_time {:.3g}s".format(current_time - last_time)] if last_steps > 0: fps = (steps - last_steps) / (current_time - last_time) message += ["approx_fps {:.3g}".format(fps)] log_writer.add_scalar("train/approx_fps", fps, steps) get_logger().info(" ".join(message)) return steps, current_time
def create_minigrid_offpolicy_data_iterator( path: str, nrollouts: int, rollout_len: int, instr_len: Optional[int], restrict_max_steps_in_dataset: Optional[int] = None, current_worker: Optional[int] = None, num_workers: Optional[int] = None, ) -> ExpertTrajectoryIterator: path = os.path.abspath(path) assert (current_worker is None) == ( num_workers is None ), "both current_worker and num_workers must be simultaneously defined or undefined" if path not in _DATASET_CACHE: get_logger().info( "Loading minigrid dataset from {} for first time...".format(path)) _DATASET_CACHE[path] = babyai.utils.load_demos(path) assert _DATASET_CACHE[path] is not None and len( _DATASET_CACHE[path]) != 0 get_logger().info( "Loading minigrid dataset complete, it contains {} trajectories". format(len(_DATASET_CACHE[path]))) return ExpertTrajectoryIterator( data=_DATASET_CACHE[path], nrollouts=nrollouts, rollout_len=rollout_len, instr_len=instr_len, restrict_max_steps_in_dataset=restrict_max_steps_in_dataset, current_worker=current_worker, num_workers=num_workers, )
def process_train_packages( self, log_writer: SummaryWriter, pkgs: List[LoggingPackage], last_steps=0, last_offpolicy_steps=0, last_time=0.0, ): assert self.mode == "train" current_time = time.time() training_steps = pkgs[0].training_steps offpolicy_steps = pkgs[0].off_policy_steps log_writer.add_scalar( tag="train/pipeline_stage", scalar_value=pkgs[0].pipeline_stage, global_step=training_steps, ) metrics_and_train_info_tracker = ScalarMeanTracker() for pkg in pkgs: metrics_and_train_info_tracker.add_scalars( scalars=pkg.metrics_tracker.means(), n=pkg.metrics_tracker.counts()) metrics_and_train_info_tracker.add_scalars( scalars=pkg.train_info_tracker.means(), n=pkg.train_info_tracker.counts(), ) message = [ "train {} steps {} offpolicy:".format(training_steps, offpolicy_steps) ] means = metrics_and_train_info_tracker.means() for k in sorted(means.keys(), key=lambda mean_key: ("/" in mean_key, mean_key)): if "offpolicy" not in k: log_writer.add_scalar("{}/".format(self.mode) + k, means[k], training_steps) else: log_writer.add_scalar(k, means[k], training_steps) message.append(k + " {:.3g}".format(means[k])) message += ["elapsed_time {:.3g}s".format(current_time - last_time)] if last_steps > 0: fps = (training_steps - last_steps) / (current_time - last_time) message += ["approx_fps {:.3g}".format(fps)] log_writer.add_scalar("train/approx_fps", fps, training_steps) if last_offpolicy_steps > 0: fps = (offpolicy_steps - last_offpolicy_steps) / (current_time - last_time) message += ["offpolicy/approx_fps {:.3g}".format(fps)] log_writer.add_scalar("offpolicy/approx_fps", fps, training_steps) get_logger().info(" ".join(message)) return training_steps, offpolicy_steps, current_time
def checkpoint_start_time_str(checkpoint_file_name): parts = checkpoint_file_name.split(os.path.sep) assert len(parts) > 1, "{} is not a valid checkpoint path".format( checkpoint_file_name) start_time_str = parts[-2] get_logger().info( "Using checkpoint start time {}".format(start_time_str)) return start_time_str
def logif(s: Union[str, Exception]): if verbose: if isinstance(s, str): get_logger().info(s) elif isinstance(s, Exception): get_logger().exception(traceback.format_exc()) else: raise NotImplementedError()
def process_test_packages( self, log_writer: Optional[SummaryWriter], pkgs: List[LoggingPackage], all_results: Optional[List[Any]] = None, ): mode = pkgs[0].mode assert mode == "test" training_steps = pkgs[0].training_steps all_metrics_tracker = ScalarMeanTracker() metric_dicts_list, render, checkpoint_file_name = [], {}, [] for pkg in pkgs: all_metrics_tracker.add_scalars( scalars=pkg.metrics_tracker.means(), n=pkg.metrics_tracker.counts()) metric_dicts_list.extend(pkg.metric_dicts) if pkg.viz_data is not None: render.update(pkg.viz_data) checkpoint_file_name.append(pkg.checkpoint_file_name) assert all_equal(checkpoint_file_name) message = [f"{mode} {training_steps} steps:"] metric_means = all_metrics_tracker.means() for k in sorted(metric_means.keys()): if log_writer is not None: log_writer.add_scalar(f"{mode}/{k}", metric_means[k], training_steps) message.append(k + " {:.3g}".format(metric_means[k])) if all_results is not None: results = copy.deepcopy(metric_means) results.update({ "training_steps": training_steps, "tasks": metric_dicts_list }) all_results.append(results) num_tasks = sum( [pkg.num_non_empty_metrics_dicts_added for pkg in pkgs]) if log_writer is not None: log_writer.add_scalar(f"{mode}/num_tasks_evaled", num_tasks, training_steps) message.append("tasks {} checkpoint {}".format( num_tasks, checkpoint_file_name[0])) get_logger().info(" ".join(message)) if self.visualizer is not None: self.visualizer.log( log_writer=log_writer, task_outputs=metric_dicts_list, render=render, num_steps=training_steps, )
def test_loop(id: int = 0, *engine_args, **engine_kwargs): OnPolicyRunner.init_process("Test", id) engine_kwargs["mode"] = "test" engine_kwargs["worker_id"] = id get_logger().info("test {} args {}".format(id, engine_kwargs)) test = OnPolicyRunner.init_worker(OnPolicyInference, engine_args, engine_kwargs) if test is not None: test.process_checkpoints() # gets checkpoints via queue
def __init__( self, env: RoboThorEnvironment, sensors: List[Sensor], task_info: Dict[str, Any], max_steps: int, reward_configs: Dict[str, Any], distance_cache: Optional[Dict[str, Any]] = None, episode_info: Optional[Dict[str, Any]] = None, **kwargs ) -> None: super().__init__( env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs ) self.reward_configs = reward_configs self._took_end_action: bool = False self._success: Optional[bool] = False self.distance_cache = distance_cache if episode_info: self.episode_optimal_corners = episode_info["shortest_path"] dist = episode_info["shortest_path_length"] else: self.episode_optimal_corners = self.env.path_corners( task_info["target"] ) # assume it's valid (sampler must take care)! dist = self.env.path_corners_to_dist(self.episode_optimal_corners) if dist == float("inf"): dist = -1.0 # -1.0 for unreachable get_logger().warning( "No path for {} from {} to {}".format( self.env.scene_name, self.env.agent_state(), task_info["target"] ) ) if self.distance_cache: self.last_geodesic_distance = get_distance( self.distance_cache, self.env.agent_state(), self.task_info["target"] ) else: self.last_geodesic_distance = self.env.dist_to_point( self.task_info["target"] ) self.optimal_distance = self.last_geodesic_distance self._rewards: List[float] = [] self._distance_to_goal: List[float] = [] self._metrics = None self.path: List[Any] = ( [] ) # the initial coordinate will be directly taken from the optimal path self.task_info["followed_path"] = [self.env.agent_state()] self.task_info["action_names"] = self.action_names() self.num_moves_made = 0
def valid_loop(id: int = 0, *engine_args, **engine_kwargs): OnPolicyRunner.init_process("Valid", id) engine_kwargs["mode"] = "valid" engine_kwargs["worker_id"] = id get_logger().info("valid {} args {}".format(id, engine_kwargs)) valid = OnPolicyRunner.init_worker(engine_class=OnPolicyInference, args=engine_args, kwargs=engine_kwargs) if valid is not None: valid.process_checkpoints() # gets checkpoints via queue
def _spawn_workers( self, make_sampler_fn: Callable[..., TaskSampler], sampler_fn_args_list: Sequence[Sequence[Dict[str, Any]]], ) -> Tuple[List[Callable[[], Any]], List[Callable[[Any], None]]]: parent_connections, worker_connections = zip( *[self._mp_ctx.Pipe(duplex=True) for _ in range(self._num_processes)] ) self._workers = [] k = 0 id: Union[int, str] for id, stuff in enumerate( zip(worker_connections, parent_connections, sampler_fn_args_list) ): worker_conn, parent_conn, current_sampler_fn_args_list = stuff # type: ignore if len(current_sampler_fn_args_list) != 1: id = "{}({}-{})".format( id, k, k + len(current_sampler_fn_args_list) - 1 ) k += len(current_sampler_fn_args_list) if self.should_log: get_logger().info( "Starting {}-th VectorSampledTask worker with args {}".format( id, current_sampler_fn_args_list ) ) ps = self._mp_ctx.Process( # type: ignore target=self._task_sampling_loop_worker, args=( id, worker_conn.recv, worker_conn.send, make_sampler_fn, current_sampler_fn_args_list, self._auto_resample_when_done, self.metrics_out_queue, self.should_log, worker_conn, parent_conn, ), ) self._workers.append(ps) ps.daemon = True ps.start() worker_conn.close() time.sleep( 0.1 ) # Useful to ensure things don't lock up when spawning many envs return ( [p.recv for p in parent_connections], [p.send for p in parent_connections], )
def _is_goal_in_range(self) -> Optional[bool]: tget = self.task_info["target"] dist = self.dist_to_target() if -0.5 < dist <= 0.2: return True elif dist > 0.2: return False else: get_logger().debug("No path for {} from {} to {}".format( self.env.scene_name, self.env.agent_state(), tget)) return None
def retry_dist(position: Dict[str, float], target: Dict[str, float]): d = self.distance_from_point_to_point(position, target, 0.05) if d < 0: get_logger().warning( f"Could not find a path from {position} to {target} with 0.05 error tolerance." f" Increasing this tolerance to 0.1 any trying again.") d = self.distance_from_point_to_point(position, target, 0.1) if d < 0: get_logger().warning( f"Could not find a path from {position} to {target} with 0.1 error tolerance." f" Returning a distance of -1.") return d
def worker_devices(self, mode: str): machine_params: MachineParams = MachineParams.instance_from( self.config.machine_params(mode)) devices = machine_params.devices assert all_equal(devices) or all( d.index >= 0 for d in devices ), f"Cannot have a mix of CPU and GPU devices (`devices == {devices}`)" get_logger().info("Using {} {} workers on devices {}".format( len(devices), mode, devices)) return devices
def __init__(self, world_dim: int, world_radius: int, sensors: Union[SensorSuite, List[Sensor]], max_steps: int, max_tasks: Optional[int] = None, num_unique_seeds: Optional[int] = None, task_seeds_list: Optional[List[int]] = None, deterministic_sampling: bool = False, seed: Optional[int] = None, **kwargs): self.env = LightHouseEnvironment(world_dim=world_dim, world_radius=world_radius) self._last_sampled_task: Optional[FindGoalLightHouseTask] = None self.sensors = (SensorSuite(sensors) if not isinstance(sensors, SensorSuite) else sensors) self.max_steps = max_steps self.max_tasks = max_tasks self.num_tasks_generated = 0 self.deterministic_sampling = deterministic_sampling self.num_unique_seeds = num_unique_seeds self.task_seeds_list = task_seeds_list assert (self.num_unique_seeds is None) or (0 < self.num_unique_seeds ), "`num_unique_seeds` must be a positive integer." self.num_unique_seeds = num_unique_seeds self.task_seeds_list = task_seeds_list if self.task_seeds_list is not None: if self.num_unique_seeds is not None: assert self.num_unique_seeds == len( self.task_seeds_list ), "`num_unique_seeds` must equal the length of `task_seeds_list` if both specified." self.num_unique_seeds = len(self.task_seeds_list) elif self.num_unique_seeds is not None: self.task_seeds_list = list(range(self.num_unique_seeds)) assert (not deterministic_sampling) or ( self.num_unique_seeds is not None ), "Cannot use deterministic sampling when `num_unique_seeds` is `None`." if (not deterministic_sampling) and self.max_tasks: get_logger().warning( "`deterministic_sampling` is `False` but you have specified `max_tasks < inf`," " this might be a mistake when running testing.") self.seed: int = int( seed if seed is not None else np.random.randint(0, 2**31 - 1)) self.np_seeded_random_gen: Optional[np.random.RandomState] = None self.set_seed(self.seed)
def dump_top_down_view(self, room_name: str, image_path: str): get_logger().debug("Dumping {}".format(image_path)) self.controller.reset(room_name) self.controller.step({ "action": "Initialize", "gridSize": 0.1, "makeAgentsVisible": False }) self.controller.step({"action": "ToggleMapView"}) top_down_view = self.controller.last_event.cv2img cv2.imwrite(image_path, top_down_view)
def __init__( self, env_builder: Union[str, Callable[..., MiniGridEnv]], sensors: Union[SensorSuite, List[Sensor]], max_tasks: Optional[int] = None, num_unique_seeds: Optional[int] = None, task_seeds_list: Optional[List[int]] = None, deterministic_sampling: bool = False, extra_task_kwargs: Optional[Dict] = None, **kwargs, ): super(BabyAITaskSampler, self).__init__() self.sensors = (SensorSuite(sensors) if not isinstance(sensors, SensorSuite) else sensors) self.max_tasks = max_tasks self.num_unique_seeds = num_unique_seeds self.deterministic_sampling = deterministic_sampling self.extra_task_kwargs = (extra_task_kwargs if extra_task_kwargs is not None else {}) self._last_env_seed: Optional[int] = None self._last_task: Optional[BabyAITask] = None assert (self.num_unique_seeds is None) or (0 < self.num_unique_seeds ), "`num_unique_seeds` must be a positive integer." self.num_unique_seeds = num_unique_seeds self.task_seeds_list = task_seeds_list if self.task_seeds_list is not None: if self.num_unique_seeds is not None: assert self.num_unique_seeds == len( self.task_seeds_list ), "`num_unique_seeds` must equal the length of `task_seeds_list` if both specified." self.num_unique_seeds = len(self.task_seeds_list) elif self.num_unique_seeds is not None: self.task_seeds_list = list(range(self.num_unique_seeds)) if (not deterministic_sampling) and self.max_tasks: get_logger().warning( "`deterministic_sampling` is `False` but you have specified `max_tasks < inf`," " this might be a mistake when running testing.") if isinstance(env_builder, str): self.env = gym.make(env_builder) else: self.env = env_builder() self.np_seeded_random_gen, _ = seeding.np_random( random.randint(0, 2**31 - 1)) self.num_tasks_generated = 0