def create_model(cls, **kwargs) -> nn.Module: return NavToPartnerActorCriticSimpleConvRNN( action_space=gym.spaces.Tuple( [ gym.spaces.Discrete(len(NavToPartnerTask.class_action_names())), gym.spaces.Discrete(len(NavToPartnerTask.class_action_names())), ] ), observation_space=SensorSuite(cls.SENSORS).observation_spaces, hidden_size=512, )
def _get_sampler_args_for_scene_split( self, scenes: List[str], process_ind: int, total_processes: int, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: if total_processes > len(scenes): # oversample some scenes -> bias if total_processes % len(scenes) != 0: print( "Warning: oversampling some of the scenes to feed all processes." " You can avoid this by setting a number of workers divisible by the number of scenes" ) scenes = scenes * int(ceil(total_processes / len(scenes))) scenes = scenes[:total_processes * (len(scenes) // total_processes)] else: if len(scenes) % total_processes != 0: print( "Warning: oversampling some of the scenes to feed all processes." " You can avoid this by setting a number of workers divisor of the number of scenes" ) inds = self._partition_inds(len(scenes), total_processes) return { "scenes": scenes[inds[process_ind]:inds[process_ind + 1]], "max_steps": self.MAX_STEPS, "sensors": self.SENSORS, "action_space": gym.spaces.Tuple([ gym.spaces.Discrete(len( NavToPartnerTask.class_action_names())), gym.spaces.Discrete(len( NavToPartnerTask.class_action_names())), ]), "seed": seeds[process_ind] if seeds is not None else None, "deterministic_cudnn": deterministic_cudnn, "rewards_config": self.REWARD_CONFIG, }
def next_task( self, force_advance_scene: bool = False) -> Optional[NavToPartnerTask]: if self.max_tasks is not None and self.max_tasks <= 0: return None scene = self.sample_scene(force_advance_scene) if self.env is not None: if scene.replace("_physics", "") != self.env.scene_name.replace( "_physics", ""): self.env.reset(scene_name=scene) else: self.env = self._create_environment() self.env.reset(scene_name=scene) too_close_to_target = True for _ in range(10): self.env.randomize_agent_location(agent_id=0) self.env.randomize_agent_location(agent_id=1) pose1 = self.env.agent_state(0) pose2 = self.env.agent_state(1) dist = self.env.distance_cache.find_distance( self.env.scene_name, {k: pose1[k] for k in ["x", "y", "z"]}, {k: pose2[k] for k in ["x", "y", "z"]}, self.env.distance_from_point_to_point, ) too_close_to_target = ( dist <= 1.25 * self.rewards_config["max_success_distance"]) if not too_close_to_target: break task_info = { "scene": scene, "initial_position1": {k: pose1[k] for k in ["x", "y", "z"]}, "initial_position2": {k: pose2[k] for k in ["x", "y", "z"]}, "initial_orientation1": pose1["rotation"]["y"], "initial_orientation2": pose2["rotation"]["y"], "id": "_".join( [scene] # + ["%4.2f" % pose1[k] for k in ["x", "y", "z"]] # + ["%4.2f" % pose1["rotation"]["y"]] # + ["%4.2f" % pose2[k] for k in ["x", "y", "z"]] # + ["%4.2f" % pose2["rotation"]["y"]] + ["%d" % random.randint(0, 2**63 - 1)]), } if too_close_to_target: get_logger().warning("Bad sampled episode {}".format(task_info)) self._last_sampled_task = NavToPartnerTask( env=self.env, sensors=self.sensors, task_info=task_info, max_steps=self.max_steps, action_space=self._action_space, reward_configs=self.rewards_config, ) return self._last_sampled_task