def handler(_signo, _frame): prefix = f"{termination_type} signal sent to worker {mode}-{id}." if to_close_on_termination._is_closed: get_logger().info( f"{prefix} Worker {mode}-{id} is already closed, exiting." ) sys.exit(0) elif not to_close_on_termination._is_closing: get_logger().info( f"{prefix} Forcing worker {mode}-{id} to close and exiting." ) try: to_close_on_termination.close(True) except Exception: get_logger().error( f"Error occurred when closing the RL engine used by work {mode}-{id}." f" We cannot recover from this and will simply exit. The exception:" ) get_logger().exception(traceback.format_exc()) sys.exit(1) sys.exit(0) else: get_logger().info( f"{prefix} Worker {mode}-{id} is already closing, ignoring this signal." )
def clips_to_video(clips, h, w, c): # encode sequence of images into gif string clip = concatenate_videoclips(clips) filename = tempfile.NamedTemporaryFile(suffix=".gif", delete=False).name # moviepy >= 1.0.0 use logger=None to suppress output. try: clip.write_gif(filename, verbose=False, logger=None) except TypeError: get_logger().warning( "Upgrade to moviepy >= 1.0.0 to suppress the progress bar.") clip.write_gif(filename, verbose=False) with open(filename, "rb") as f: tensor_string = f.read() try: os.remove(filename) except OSError: get_logger().warning( "The temporary file used by moviepy cannot be deleted.") return TBXSummary.Image(height=h, width=w, colorspace=c, encoded_image_string=tensor_string)
def train_loop( id: int = 0, checkpoint: Optional[str] = None, restart_pipeline: bool = False, *engine_args, **engine_kwargs, ): engine_kwargs["mode"] = TRAIN_MODE_STR engine_kwargs["worker_id"] = id engine_kwargs_for_print = { k: (v if k != "initial_model_state_dict" else "[SUPPRESSED]") for k, v in engine_kwargs.items() } get_logger().info(f"train {id} args {engine_kwargs_for_print}") trainer: OnPolicyTrainer = OnPolicyRunner.init_worker( engine_class=OnPolicyTrainer, args=engine_args, kwargs=engine_kwargs) if trainer is not None: OnPolicyRunner.init_process("Train", id, to_close_on_termination=trainer) trainer.train(checkpoint_file_name=checkpoint, restart_pipeline=restart_pipeline)
def _generate_and_record_expert_action(self): """Generate the next greedy expert action and save it to the `expert_action_list`.""" if self.task.num_steps_taken() == len(self.expert_action_list) + 1: get_logger().warning( f"Already generated the expert action at step {self.task.num_steps_taken()}" ) return assert self.task.num_steps_taken() == len( self.expert_action_list ), f"{self.task.num_steps_taken()} != {len(self.expert_action_list)}" expert_action_dict = self._generate_expert_action_dict() action_str = stringcase.snakecase(expert_action_dict["action"]) if action_str not in self.task.action_names(): obj_type = stringcase.snakecase( expert_action_dict["objectId"].split("|")[0]) action_str = f"{action_str}_{obj_type}" try: self.expert_action_list.append( self.task.action_names().index(action_str)) except ValueError: get_logger().error( f"{action_str} is not a valid action for the given task.") self.expert_action_list.append(None)
def my_summary_iterator(path): try: for r in tf_record.tf_record_iterator(path): yield event_pb2.Event.FromString(r) except IOError: get_logger().debug(f"IOError for path {path}") return None
def judge(self) -> float: reward = -0.01 new_geodesic_distance = self.current_geodesic_dist_to_target() if self.last_geodesic_distance is None: self.last_geodesic_distance = new_geodesic_distance if self.last_geodesic_distance is not None: if (new_geodesic_distance is None or new_geodesic_distance in [float("-inf"), float("inf")] or np.isnan(new_geodesic_distance)): new_geodesic_distance = self.last_geodesic_distance delta_distance_reward = self.last_geodesic_distance - new_geodesic_distance reward += delta_distance_reward self.last_geodesic_distance = new_geodesic_distance if self.is_done(): reward += 10.0 if self._success else self.failed_end_reward else: get_logger().warning( "Could not get geodesic distance from habitat env.") self._rewards.append(float(reward)) return float(reward)
def get_translator(self) -> Dict[str, Any]: # roomname = list(ThorViz.iterate_scenes(self.scenes))[0] all_map_data = {} for roomname in ThorViz.iterate_scenes(self.scenes): json_file = self.cached_map_data_path(roomname) if not os.path.exists(json_file): self.make_controller() self.controller.reset(roomname) map_data = self.get_agent_map_data() get_logger().info("Dumping {}".format(json_file)) with open(json_file, "w") as f: json.dump(map_data, f, indent=4, sort_keys=True) else: with open(json_file, "r") as f: map_data = json.load(f) pos_translator = ThorPositionTo2DFrameTranslator( self.viz_rows_cols, self.position_to_tuple(map_data["cam_position"]), map_data["cam_orth_size"], ) map_data["pos_translator"] = pos_translator all_map_data[roomname] = map_data get_logger().debug("Using map_data {}".format(all_map_data)) return all_map_data
def loss_per_step( # type: ignore self, step_count: int, batch: ObservationType, actor_critic_output: ActorCriticOutput[CategoricalDistr], ) -> Dict[str, Tuple[torch.Tensor, Optional[float]]]: actions = cast(torch.LongTensor, batch["actions"]) values = actor_critic_output.values action_log_probs = actor_critic_output.distributions.log_prob(actions) action_log_probs = action_log_probs.view( action_log_probs.shape + (1, ) * (len(cast(torch.Tensor, batch["adv_targ"]).shape) - len(action_log_probs.shape))) dist_entropy: torch.FloatTensor = actor_critic_output.distributions.entropy( ) value_loss = 0.5 * (cast(torch.FloatTensor, batch["returns"]) - values).pow(2) # TODO: Decided not to use normalized advantages here, # is this correct? (it's how it's done in Kostrikov's) action_loss = -(cast(torch.FloatTensor, batch["adv_targ"]).detach() * action_log_probs) if self.acktr: # TODO: Currently acktr doesn't really work because of this natural gradient stuff # that we should figure out how to integrate properly. get_logger().warning("acktr is only partially supported.") return { "value": (value_loss, self.value_loss_coef), "action": (action_loss, None), "entropy": (dist_entropy.mul_(-1.0), self.entropy_coef), # type: ignore }
def decorated(*args, **kwargs): if not have_warned[0]: get_logger().warning( f"'{name}' has been deprecated and will soon be removed from AllenAct's API." f" Please discontinue your use of this function.", ) have_warned[0] = True return to_decorate(*args, **kwargs)
def create_minigrid_offpolicy_data_iterator( path: str, nrollouts: int, rollout_len: int, instr_len: Optional[int], restrict_max_steps_in_dataset: Optional[int] = None, current_worker: Optional[int] = None, num_workers: Optional[int] = None, ) -> ExpertTrajectoryIterator: path = os.path.abspath(path) assert (current_worker is None) == ( num_workers is None ), "both current_worker and num_workers must be simultaneously defined or undefined" if path not in _DATASET_CACHE: get_logger().info( "Loading minigrid dataset from {} for first time...".format(path)) _DATASET_CACHE[path] = babyai.utils.load_demos(path) assert _DATASET_CACHE[path] is not None and len( _DATASET_CACHE[path]) != 0 get_logger().info( "Loading minigrid dataset complete, it contains {} trajectories". format(len(_DATASET_CACHE[path]))) return ExpertTrajectoryIterator( data=_DATASET_CACHE[path], nrollouts=nrollouts, rollout_len=rollout_len, instr_len=instr_len, restrict_max_steps_in_dataset=restrict_max_steps_in_dataset, current_worker=current_worker, num_workers=num_workers, )
def _check_contains_key(self, key: Tuple[float, float, int, int], add_if_not=True): if key not in self.graph: get_logger().warning( "{} was not in the graph for scene {}.".format(key, self.scene_name) ) if add_if_not: self._add_node_to_graph(self.graph, key)
def randomize_agent_location( self, seed: int = None, partial_position: Optional[Dict[str, float]] = None) -> Dict: """Teleports the agent to a random reachable location in the scene.""" if partial_position is None: partial_position = {} k = 0 state: Optional[Dict] = None while k == 0 or (not self.last_action_success and k < 10): state = self.random_reachable_state(seed=seed) self.teleport_agent_to(**{**state, **partial_position}) k += 1 if not self.last_action_success: get_logger().warning( ("Randomize agent location in scene {}" " with seed {} and partial position {} failed in " "10 attempts. Forcing the action.").format( self.scene_name, seed, partial_position)) self.teleport_agent_to(**{ **state, **partial_position }, force_action=True) # type: ignore assert self.last_action_success assert state is not None return state
def find_distance( self, scene_name: str, position: Dict[str, Any], target: Union[Dict[str, Any], str], native_distance_function: Callable[ [Dict[str, Any], Union[Dict[str, Any], str]], float ], ) -> float: # Convert the position to its rounded string representation position_str = scene_name + self._pos_to_str(position) # If the target is also a position, convert it to its rounded string representation if isinstance(target, str): target_str = target else: target_str = self._pos_to_str(target) if position_str not in self.cache: self.cache[position_str] = {} if target_str not in self.cache[position_str]: self.cache[position_str][target_str] = native_distance_function( position, target ) self.misses += 1 else: self.hits += 1 self.num_accesses += 1 if self.num_accesses % 1000 == 0: get_logger().debug("Cache Miss-Hit Ratio: %.4f" % (self.misses / self.hits)) return self.cache[position_str][target_str]
def _create_generators( self, make_sampler_fn: Callable[..., TaskSampler], sampler_fn_args: Sequence[Dict[str, Any]], ) -> List[Generator]: generators = [] for id, current_sampler_fn_args in enumerate(sampler_fn_args): if self.should_log: get_logger().info( "Starting {}-th SingleProcessVectorSampledTasks generator with args {}".format( id, current_sampler_fn_args ) ) generators.append( self._task_sampling_loop_generator_fn( worker_id=id, make_sampler_fn=make_sampler_fn, sampler_fn_args=current_sampler_fn_args, auto_resample_when_done=self._auto_resample_when_done, should_log=self.should_log, ) ) if next(generators[-1]) != "started": raise RuntimeError("Generator failed to start.") return generators
def layer_init(self): for layer in self.modules(): if isinstance(layer, (nn.Conv2d, nn.Linear)): nn.init.kaiming_normal_(layer.weight, nn.init.calculate_gain("relu")) if layer.bias is not None: nn.init.constant_(layer.bias, val=0) get_logger().info("initialize resnet encoder")
def _add_node_to_graph( self, graph: nx.DiGraph, s: Tuple[int, int, int], valid_node_types: Tuple[str, ...], attr_dict: Dict[Any, Any] = None, include_rotation_free_leaves: bool = False, ): if s in graph: return if attr_dict is None: get_logger().warning( "adding a node with neighbor checks and no attributes") graph.add_node(s, **attr_dict) if include_rotation_free_leaves: rot_free_leaf = (*s[:-1], None) if rot_free_leaf not in graph: graph.add_node(rot_free_leaf) graph.add_edge(s, rot_free_leaf, action="NA") if attr_dict["type"] in valid_node_types: for o in self.possible_neighbor_offsets(): t = (s[0] + o[0], s[1] + o[1], (s[2] + o[2]) % 4) if t in graph and graph.nodes[t]["type"] in valid_node_types: self._add_from_to_edge(graph, s, t) self._add_from_to_edge(graph, t, s)
def process_eval_package(self, log_writer: Optional[SummaryWriter], pkg: LoggingPackage): training_steps = pkg.training_steps checkpoint_file_name = pkg.checkpoint_file_name render = pkg.viz_data task_outputs = pkg.metric_dicts num_tasks = pkg.num_non_empty_metrics_dicts_added metric_means = pkg.metrics_tracker.means() mode = pkg.mode if log_writer is not None: log_writer.add_scalar(f"{mode}-misc/num_tasks_evaled", num_tasks, training_steps) message = [f"{mode} {training_steps} steps:"] for k in sorted(metric_means.keys()): if log_writer is not None: log_writer.add_scalar(f"{mode}-metrics/{k}", metric_means[k], training_steps) message.append(f"{k} {metric_means[k]}") message.append(f"tasks {num_tasks} checkpoint {checkpoint_file_name}") get_logger().info(" ".join(message)) if self.visualizer is not None: self.visualizer.log( log_writer=log_writer, task_outputs=task_outputs, render=render, num_steps=training_steps, )
def log( self, log_writer: SummaryWriter, task_outputs: Optional[List[Any]], render: Optional[Dict[str, List[Dict[str, Any]]]], num_steps: int, ): viz_order, all_episodes = self._auto_viz_order(task_outputs) if viz_order is None: get_logger().debug("trajectory viz returning without visualizing") return for page, current_ids in enumerate(viz_order): figs = [] for episode_id in current_ids: # assert episode_id in all_episodes if episode_id not in all_episodes: get_logger().warning( "skipping viz for missing episode {}".format(episode_id) ) continue figs.append(self.make_fig(all_episodes[episode_id], episode_id)) if len(figs) == 0: continue log_writer.add_figure( "{}/{}_group{}".format(self.mode, self.label, page), figs, global_step=num_steps, ) plt.close( "all" ) # close all current figures (SummaryWriter already closes all figures we log)
def checkpoint_start_time_str(checkpoint_file_name): parts = checkpoint_file_name.split(os.path.sep) assert len(parts) > 1, "{} is not a valid checkpoint path".format( checkpoint_file_name) start_time_str = parts[-2] get_logger().info( "Using checkpoint start time {}".format(start_time_str)) return start_time_str
def stop(self) -> None: """Stops the ai2thor controller.""" try: self.controller.stop() except Exception as e: get_logger().warning(str(e)) finally: self._started = False
def logif(s: Union[str, Exception]): if verbose: if isinstance(s, str): get_logger().info(s) elif isinstance(s, Exception): get_logger().exception(traceback.format_exc()) else: raise NotImplementedError()
def process_test_packages( self, log_writer: Optional[SummaryWriter], pkgs: List[LoggingPackage], all_results: Optional[List[Any]] = None, ): mode = pkgs[0].mode assert mode == "test" training_steps = pkgs[0].training_steps all_metrics_tracker = ScalarMeanTracker() metric_dicts_list, render, checkpoint_file_name = [], {}, [] for pkg in pkgs: all_metrics_tracker.add_scalars( scalars=pkg.metrics_tracker.means(), n=pkg.metrics_tracker.counts()) metric_dicts_list.extend(pkg.metric_dicts) if pkg.viz_data is not None: render.update(pkg.viz_data) checkpoint_file_name.append(pkg.checkpoint_file_name) assert all_equal(checkpoint_file_name) message = [f"{mode} {training_steps} steps:"] metric_means = all_metrics_tracker.means() for k in sorted(metric_means.keys()): if log_writer is not None: log_writer.add_scalar(f"{mode}-metrics/{k}", metric_means[k], training_steps) message.append(k + " {:.3g}".format(metric_means[k])) if all_results is not None: results = copy.deepcopy(metric_means) results.update({ "training_steps": training_steps, "tasks": metric_dicts_list }) all_results.append(results) num_tasks = sum( [pkg.num_non_empty_metrics_dicts_added for pkg in pkgs]) if log_writer is not None: log_writer.add_scalar(f"{mode}-misc/num_tasks_evaled", num_tasks, training_steps) message.append("tasks {} checkpoint {}".format( num_tasks, checkpoint_file_name[0])) get_logger().info(" ".join(message)) if self.visualizer is not None: self.visualizer.log( log_writer=log_writer, task_outputs=metric_dicts_list, render=render, num_steps=training_steps, )
def main(): args = get_args() init_logging(args.log_level) get_logger().info("Running with args {}".format(args)) ptitle( "Master: {}".format("Training" if args.eval is None else "Evaluation")) cfg, srcs = load_config(args) if not args.eval: OnPolicyRunner( config=cfg, output_dir=args.output_dir, save_dir_fmt=args.save_dir_fmt, loaded_config_src_files=srcs, seed=args.seed, mode="train", deterministic_cudnn=args.deterministic_cudnn, deterministic_agents=args.deterministic_agents, extra_tag=args.extra_tag, disable_tensorboard=args.disable_tensorboard, disable_config_saving=args.disable_config_saving, distributed_ip_and_port=args.distributed_ip_and_port, machine_id=args.machine_id, ).start_train( checkpoint=args.checkpoint, restart_pipeline=args.restart_pipeline, max_sampler_processes_per_worker=args. max_sampler_processes_per_worker, collect_valid_results=args.collect_valid_results, ) else: OnPolicyRunner( config=cfg, output_dir=args.output_dir, save_dir_fmt=args.save_dir_fmt, loaded_config_src_files=srcs, seed=args.seed, mode="test", deterministic_cudnn=args.deterministic_cudnn, deterministic_agents=args.deterministic_agents, extra_tag=args.extra_tag, disable_tensorboard=args.disable_tensorboard, disable_config_saving=args.disable_config_saving, distributed_ip_and_port=args.distributed_ip_and_port, machine_id=args.machine_id, ).start_test( checkpoint_path_dir_or_pattern=args.checkpoint, infer_output_dir=args.infer_output_dir, approx_ckpt_step_interval=args.approx_ckpt_step_interval, max_sampler_processes_per_worker=args. max_sampler_processes_per_worker, inference_expert=args.test_expert, )
def decorated(*args, **kwargs): if not have_warned[0]: get_logger().warning( f"'{name}' is a part of AllenAct's experimental API." f" This means: (1) there are likely bugs present and (2)" f" we may remove/change this functionality without warning." f" USE AT YOUR OWN RISK.", ) have_warned[0] = True return to_decorate(*args, **kwargs)
def find_closest_inverse(deg): for k in _saved_inverse_rotation_mats.keys(): if abs(k - deg) < 5: return _saved_inverse_rotation_mats[k] # if it reaches here it means it had not calculated the degree before rotation = R.from_euler("xyz", [0, deg, 0], degrees=True) result = rotation.as_matrix() inverse = inverse_rot_trans_matrix(result) get_logger().warning(f"Had to calculate the matrix for {deg}") return inverse
def multiprocessing_safe_download_file_from_url(url: str, save_path: str): with filelock.FileLock(save_path + ".lock"): if not os.path.isfile(save_path): get_logger().info(f"Downloading file from {url} to {save_path}.") urllib.request.urlretrieve( url, save_path, ) else: get_logger().debug(f"{save_path} exists - skipping download.")
def test_loop(id: int = 0, *engine_args, **engine_kwargs): OnPolicyRunner.init_process("Test", id) engine_kwargs["mode"] = "test" engine_kwargs["worker_id"] = id get_logger().info("test {} args {}".format(id, engine_kwargs)) test = OnPolicyRunner.init_worker(OnPolicyInference, engine_args, engine_kwargs) if test is not None: test.process_checkpoints() # gets checkpoints via queue
def __init__( self, action_space: Optional[Union[gym.Space, int]] = None, uuid: str = "expert_sensor_type_uuid", expert_args: Optional[Dict[str, Any]] = None, nactions: Optional[int] = None, use_dict_as_groups: bool = True, **kwargs: Any, ) -> None: """Initialize an `ExpertSensor`. # Parameters action_space : The action space of the agent. This is necessary in order for this sensor to know what its output observation space is. uuid : A string specifying the unique ID of this sensor. expert_args : This sensor obtains an expert action from the task by calling the `query_expert` method of the task. `expert_args` are any keyword arguments that should be passed to the `query_expert` method when called. nactions : [DEPRECATED] The number of actions available to the agent, corresponds to an `action_space` of `gym.spaces.Discrete(nactions)`. use_dict_as_groups : Whether to use the top-level action_space of type `gym.spaces.Dict` as action groups. """ if isinstance(action_space, int): action_space = gym.spaces.Discrete(action_space) elif action_space is None: assert ( nactions is not None ), "One of `action_space` or `nactions` must be not `None`." get_logger().warning( "The `nactions` parameter to `AbstractExpertSensor` is deprecated and will be removed, please use" " the `action_space` parameter instead." ) action_space = gym.spaces.Discrete(nactions) self.action_space = action_space self.use_groups = ( isinstance(action_space, gym.spaces.Dict) and use_dict_as_groups ) self.group_spaces = ( self.action_space if self.use_groups else OrderedDict([(self._NO_GROUPS_LABEL, self.action_space,)]) ) self.expert_args: Dict[str, Any] = expert_args or {} assert ( "expert_sensor_group_name" not in self.expert_args ), "`expert_sensor_group_name` is reserved for `AbstractExpertSensor`" observation_space = self._get_observation_space() super().__init__(**prepare_locals_for_super(locals()))
def __init__( self, world_dim: int, world_radius: int, sensors: Union[SensorSuite, List[Sensor]], max_steps: int, max_tasks: Optional[int] = None, num_unique_seeds: Optional[int] = None, task_seeds_list: Optional[List[int]] = None, deterministic_sampling: bool = False, seed: Optional[int] = None, **kwargs, ): self.env = LightHouseEnvironment(world_dim=world_dim, world_radius=world_radius) self._last_sampled_task: Optional[FindGoalLightHouseTask] = None self.sensors = (SensorSuite(sensors) if not isinstance(sensors, SensorSuite) else sensors) self.max_steps = max_steps self.max_tasks = max_tasks self.num_tasks_generated = 0 self.deterministic_sampling = deterministic_sampling self.num_unique_seeds = num_unique_seeds self.task_seeds_list = task_seeds_list assert (self.num_unique_seeds is None) or (0 < self.num_unique_seeds ), "`num_unique_seeds` must be a positive integer." self.num_unique_seeds = num_unique_seeds self.task_seeds_list = task_seeds_list if self.task_seeds_list is not None: if self.num_unique_seeds is not None: assert self.num_unique_seeds == len( self.task_seeds_list ), "`num_unique_seeds` must equal the length of `task_seeds_list` if both specified." self.num_unique_seeds = len(self.task_seeds_list) elif self.num_unique_seeds is not None: self.task_seeds_list = list(range(self.num_unique_seeds)) assert (not deterministic_sampling) or ( self.num_unique_seeds is not None ), "Cannot use deterministic sampling when `num_unique_seeds` is `None`." if (not deterministic_sampling) and self.max_tasks: get_logger().warning( "`deterministic_sampling` is `False` but you have specified `max_tasks < inf`," " this might be a mistake when running testing.") self.seed: int = int( seed if seed is not None else np.random.randint(0, 2**31 - 1)) self.np_seeded_random_gen: Optional[np.random.RandomState] = None self.set_seed(self.seed)
def dump_top_down_view(self, room_name: str, image_path: str): get_logger().debug("Dumping {}".format(image_path)) self.controller.reset(room_name) self.controller.step( {"action": "Initialize", "gridSize": 0.1, "makeAgentsVisible": False} ) self.controller.step({"action": "ToggleMapView"}) top_down_view = self.controller.last_event.cv2img cv2.imwrite(image_path, top_down_view)