def init_env_and_agent(env_config: DictConfig, wrappers_config: CollectionOfConfigType, max_episode_steps: int, agent_config: DictConfig, input_dir: str, env_instance_seed: int, agent_instance_seed: int) -> (BaseEnv, Policy): """Build the environment (including wrappers) and agent according to given configuration. :param env_config: Environment config. :param wrappers_config: Wrapper config. :param max_episode_steps: Max number of steps per episode to limit the env for. :param agent_config: Policies config. :param input_dir: Directory to load the model from. :param env_instance_seed: The seed for this particular env. :param agent_instance_seed: The seed for this particular agent. :return: Tuple of (instantiated environment, instantiated agent). """ with SwitchWorkingDirectoryToInput(input_dir): env = EnvFactory(env_config, wrappers_config)() if not isinstance(env, TimeLimitWrapper): env = TimeLimitWrapper.wrap(env) env.set_max_episode_steps(max_episode_steps) env.seed(env_instance_seed) agent = Factory(base_type=Policy).instantiate(agent_config) agent.seed(agent_instance_seed) return env, agent
def _generate_runners(self, run_mode: RunMode) -> List[TrainingRunner]: """ Generates training or rollout runner(s). :param run_mode: Run mode. See See :py:class:`~maze.maze.api.RunMode`. :return: Instantiated Runner instance. """ cl = ConfigurationLoader(_run_mode=run_mode, _kwargs=self._auditors[run_mode].kwargs, _overrides=self._auditors[run_mode].overrides, _ephemeral_init_kwargs=self. _auditors[run_mode].ephemeral_init_kwargs) cl.load() self._workdirs = cl.workdirs self._configs[run_mode] = cl.configs runners: List[TrainingRunner] = [] # Change to correct working directory (necessary due to being outside of Hydra scope). for workdir, config in zip(self._workdirs, self._configs[run_mode]): with working_directory(workdir): # Allow non-primitives in Hydra config. with omegaconf.flag_override(config, "allow_objects", True) as cfg: # Set up and return runner. runner = Factory( base_type=TrainingRunner if run_mode == RunMode.TRAINING else RolloutRunner).instantiate( cfg.runner) runner.setup(cfg) runners.append(runner) return runners
def __init__( self, algorithm_config: ESAlgorithmConfig, torch_policy: TorchPolicy, shared_noise: SharedNoiseTable, normalization_stats: Optional[Dict[str, Tuple[np.ndarray, np.ndarray]]] ) -> None: super().__init__(algorithm_config) # --- training setup --- self.model_selection: Optional[ModelSelectionBase] = None self.policy: Union[Policy, TorchModel] = torch_policy self.shared_noise = shared_noise self.normalization_stats = normalization_stats # setup the optimizer, now that the policy is available self.optimizer = Factory(Optimizer).instantiate( algorithm_config.optimizer) self.optimizer.setup(self.policy) # prepare statistics collection self.eval_stats = LogStatsAggregator(LogStatsLevel.EPOCH, get_stats_logger("eval")) self.train_stats = LogStatsAggregator(LogStatsLevel.EPOCH, get_stats_logger("train")) # injection of ES-specific events self.es_events = self.train_stats.create_event_topic(ESEvents)
def __init__(self, theta_threshold_radians: float, x_threshold: float, reward_aggregator: RewardAggregatorInterface): super().__init__() self.theta_threshold_radians = theta_threshold_radians self.x_threshold = x_threshold # init pubsub for event to reward routing self.pubsub = Pubsub(self.context.event_service) # KPIs calculation self.kpi_calculator = CartPoleKpiCalculator() # init reward and register it with pubsub self.reward_aggregator = Factory(RewardAggregatorInterface).instantiate(reward_aggregator) self.pubsub.register_subscriber(self.reward_aggregator) # setup environment self.cart_position = None self.cart_velocity = None self.pole_angle = None self.pole_velocity = None self.env_rng: Optional[np.random.RandomState] = None self.seed(None) self._setup_env() # initialize rendering self.renderer = CartPoleRenderer(pole_length=self.length, x_threshold=self.x_threshold)
def __init__(self, observation_spaces_dict: Dict[Union[str, int], spaces.Dict], action_spaces_dict: Dict[Union[str, int], spaces.Dict], networks: CollectionOfConfigType): super().__init__(observation_spaces_dict, action_spaces_dict) assert len(networks) == 1 network = networks[0] flat_action_space = flat_structured_space(self._action_spaces_dict) obs_shapes_flat = flat_structured_shapes(self._obs_shapes) # Infer the critic out shapes. When all action heads in a given state are discrete the discrete version of the # state-action critic is used that outputs a value for each possible action (for each action). Otherwise # the more general version is used which returns one value for a given state and action. critic_output_shapes = dict() if all(self._only_discrete_spaces.values()): for act_key, act_space in flat_action_space.spaces.items(): critic_output_shapes[act_key + '_q_values'] = (act_space.n, ) else: for act_key, act_space in flat_action_space.spaces.items(): if isinstance(act_space, spaces.Discrete): obs_shapes_flat[act_key] = (act_space.n, ) else: obs_shapes_flat[act_key] = act_space.sample().shape critic_output_shapes['q_value'] = (1, ) # initialize critic model_registry = Factory(base_type=nn.Module) self._critics = { 0: model_registry.instantiate(network, obs_shapes=obs_shapes_flat, output_shapes=critic_output_shapes) }
def test_init_cartpole_rllib_model(): """test the init methods""" hydra_overrides = {'rllib/runner': 'dev', 'model': 'rllib'} cfg = load_hydra_config('maze.conf', 'conf_rllib', hydra_overrides) runner = Factory(base_type=MazeRLlibRunner).instantiate(cfg.runner) runner.setup(cfg) ray_config, rllib_config, tune_config = runner.ray_config, runner.rllib_config, runner.tune_config assert isinstance(runner.env_factory(), CartPoleEnv) assert isinstance(ray_config, dict) assert isinstance(rllib_config, dict) assert isinstance(tune_config, dict) assert rllib_config['env'] == 'maze_env' assert rllib_config['framework'] == 'torch' assert rllib_config['num_workers'] == 1 for k, v in rllib_config['model'].items(): if v == "DEPRECATED_VALUE": v = DEPRECATED_VALUE assert k in MODEL_DEFAULTS, f'Maze RLlib model parameter \'{k}\' not in RLlib MODEL_DEFAULTS (rllib version: ' \ f'{ray.__version__})' assert MODEL_DEFAULTS[k] == v, f'Rllib key:\'{k}\',value:\'{MODEL_DEFAULTS[k]}\' does not match with the ' \ f'maze defined config \'{v}\' with rllib version: {ray.__version__}' if 'ObservationNormalizationWrapper' in cfg.wrappers: assert os.path.exists( cfg.wrappers.ObservationNormalizationWrapper.statistics_dump) os.remove(cfg.wrappers.ObservationNormalizationWrapper.statistics_dump)
def _initialize_normalization_strategies(self) -> None: """Initialize normalization strategies for all sub steps and all dictionary observations. """ # iterate sub steps for sub_step_key, sub_space in self._original_observation_spaces_dict.items( ): assert isinstance( sub_space, gym.spaces.Dict ), "Only gym.spaces.Dict are supported as of now!" # iterate keys of dict observation space for obs_key in sub_space.spaces.keys(): if obs_key in self.exclude: continue # start out with default values normalization_strategy = self.default_strategy strategy_config = copy.copy(self.default_strategy_config) statistics = self.default_statistics # check if statistics have been computed and dumped if obs_key in self.loaded_stats: statistics = self.loaded_stats[obs_key] # check if a manual config is specified if self._has_manual_config(obs_key): manual_obs_config = self.manual_config[obs_key] normalization_strategy = manual_obs_config.get( "strategy", normalization_strategy) statistics = manual_obs_config.get("statistics", statistics) strategy_config.update( manual_obs_config.get("strategy_config", dict())) # build normalization strategy strategy = Factory( ObservationNormalizationStrategy).instantiate({ "_target_": normalization_strategy, "observation_space": sub_space[obs_key], **strategy_config }) # update the observation space accordingly if statistics is not None and obs_key not in self.exclude: strategy.set_statistics(statistics) self.observation_spaces_dict[sub_step_key].spaces[ obs_key] = strategy.normalized_space() self._normalization_strategies[obs_key] = strategy # make sure that everything has been applied properly if self.manual_config is not None: self._check_manual_config()
def __init__(self, core_env: Union[CoreEnv, dict], action_conversion: CollectionOfConfigType, observation_conversion: CollectionOfConfigType): super().__init__( core_env=Factory(CartPoleCoreEnvironment).instantiate(core_env), action_conversion_dict=Factory(ActionConversionInterface).instantiate_collection(action_conversion), observation_conversion_dict=Factory(ObservationConversionInterface).instantiate_collection( observation_conversion))
class DefaultPolicy(Policy): """Encapsulates one or more policies identified by policy IDs. :param policies: Dict of policy IDs and corresponding policies. """ def __init__(self, policies: CollectionOfConfigType): self.policies = Factory(FlatPolicy).instantiate_collection(policies) @override(Policy) def needs_state(self) -> bool: """This policy does not require the state() object to compute the action.""" return False @override(Policy) def seed(self, seed: int) -> None: """Not applicable since Global seed should already be set before initializing the models""" pass @override(Policy) def compute_action(self, observation: ObservationType, maze_state: Optional[MazeStateType] = None, env: Optional[BaseEnv] = None, actor_id: Optional[ActorID] = None, deterministic: bool = False) -> ActionType: """implementation of :class:`~maze.core.agent.policy.Policy` interface""" return self.policy_for(actor_id).compute_action( observation, deterministic=deterministic) def policy_for(self, actor_id: Optional[ActorID]) -> FlatPolicy: """Return policy corresponding to the given actor ID (or the single available policy if no actor ID is provided) :param actor_id: Actor ID to get policy for :return: Flat policy corresponding to the actor ID """ if actor_id is None: assert len( self.policies.items() ) == 1, "no policy ID provided but multiple policies are available" return list(self.policies.values())[0] else: return self.policies[actor_id.step_key] @override(Policy) def compute_top_action_candidates(self, observation: ObservationType, num_candidates: Optional[int], maze_state: Optional[MazeStateType], env: Optional[BaseEnv], actor_id: Optional[ActorID] = None) \ -> Tuple[Sequence[ActionType], Sequence[float]]: """implementation of :class:`~maze.core.agent.policy.Policy` interface""" raise NotImplementedError
def setup(self, cfg: DictConfig) -> None: """ See :py:meth:`~maze.train.trainers.common.training_runner.TrainingRunner.setup`. """ super().setup(cfg) # initialize distributed env envs = self.create_distributed_env(self.env_factory, self.concurrency, logging_prefix="train") train_env_instance_seeds = [self.maze_seeding.generate_env_instance_seed() for _ in range(self.concurrency)] envs.seed(train_env_instance_seeds) # initialize actor critic model model = TorchActorCritic( policy=self._model_composer.policy, critic=self._model_composer.critic, device=cfg.algorithm.device) # initialize best model selection self._model_selection = BestModelSelection(dump_file=self.state_dict_dump_file, model=model, dump_interval=self.dump_interval) # initialize the env and enable statistics collection evaluator = None if cfg.algorithm.rollout_evaluator.n_episodes > 0: eval_env = self.create_distributed_env(self.env_factory, self.eval_concurrency, logging_prefix="eval") eval_env_instance_seeds = [self.maze_seeding.generate_env_instance_seed() for _ in range(self.eval_concurrency)] eval_env.seed(eval_env_instance_seeds) # initialize rollout evaluator evaluator = Factory(base_type=RolloutEvaluator).instantiate(cfg.algorithm.rollout_evaluator, eval_env=eval_env, model_selection=self._model_selection) # look up model class trainer_class = Factory(base_type=ActorCritic).type_from_name(self.trainer_class) # initialize trainer (from input directory) self._trainer = trainer_class( algorithm_config=cfg.algorithm, rollout_generator=RolloutGenerator(env=envs), evaluator=evaluator, model=model, model_selection=self._model_selection ) self._init_trainer_from_input_dir(trainer=self._trainer, state_dict_dump_file=self.state_dict_dump_file, input_dir=cfg.input_dir)
def __init__(self, action_spaces_dict: Dict[StepKeyType, gym.spaces.Dict], observation_spaces_dict: Dict[StepKeyType, gym.spaces.Dict], agent_counts_dict: Dict[StepKeyType, int], distribution_mapper_config: ConfigType, policy: ConfigType, critic: ConfigType): super().__init__(action_spaces_dict, observation_spaces_dict, agent_counts_dict, distribution_mapper_config) # init policy composer self._policy_composer = Factory(BasePolicyComposer).instantiate( policy, action_spaces_dict=self.action_spaces_dict, observation_spaces_dict=self.observation_spaces_dict, agent_counts_dict=self.agent_counts_dict, distribution_mapper=self._distribution_mapper) self.critic_input_spaces_dict = self._build_critic_input_space_dict() # init critic composer self._critics_composer = None if critic is not None: critic_type = Factory(CriticComposerInterface).type_from_name( critic['_target_']) if isinstance(critic, Mapping) else type(critic) if issubclass(critic_type, SharedStateCriticComposer): assert self.critic_input_spaces_dict == self.observation_spaces_dict, \ f'Shared embedding is not yet supported for shared state critics' if issubclass(critic_type, BaseStateCriticComposer): self._critics_composer = Factory( BaseStateCriticComposer).instantiate( critic, observation_spaces_dict=self.critic_input_spaces_dict, agent_counts_dict=self.agent_counts_dict) elif issubclass(critic_type, BaseStateActionCriticComposer): assert self.critic_input_spaces_dict == self.observation_spaces_dict, \ f'Shared embedding is not yet supported for state-action critics' self._critics_composer = Factory( BaseStateActionCriticComposer).instantiate( critic, observation_spaces_dict=self.critic_input_spaces_dict, action_spaces_dict=self.action_spaces_dict) else: raise ValueError( f"Critic of type {critic_type} not supported!") # save model graphs to pdf self.save_models()
def load_replay_buffer(self, replay_buffer: BaseReplayBuffer, cfg: DictConfig) -> None: """Load the given trajectories as a dataset and fill the buffer with these trajectories. :param replay_buffer: The replay buffer to fill. :param cfg: The dict config of the experiment. """ print(f'******* Starting to fill the replay buffer with trajectories from path: ' f'{self.initial_demonstration_trajectories.input_data} *******') with SwitchWorkingDirectoryToInput(cfg.input_dir): dataset = Factory(base_type=Dataset).instantiate(self.initial_demonstration_trajectories, conversion_env_factory=self.env_factory) assert isinstance(dataset, InMemoryDataset), 'Only in memory dataset supported at this point' if cfg.algorithm.split_rollouts_into_transitions: for step_record in dataset.step_records: assert step_record.next_observations is not None, "Next observations are required for sac" assert all(map(lambda x: x is not None, step_record.next_observations)), \ "Next observations are required for sac" replay_buffer.add_transition(step_record) else: for idx, trajectory_reference in enumerate(dataset.trajectory_references): traj = SpacesTrajectoryRecord(id=idx) traj.step_records = dataset.step_records[trajectory_reference] replay_buffer.add_transition(traj)
def test_parallel_data_load_from_directory_clipped_from_hydra(): """Test loading trajectories of multiple episodes in parallel into an in-memory dataset. (Each data-loader process reads the files assigned to it.)""" # Heuristics rollout rollout_config = { "configuration": "test", "env": "gym_env", "env.name": "CartPole-v0", "policy": "random_policy", "runner": "sequential", "runner.n_episodes": 2, "runner.max_episode_steps": 20, "seeding.env_base_seed": 12345, "seeding.agent_base_seed": 12345, } run_maze_job(rollout_config, config_module="maze.conf", config_name="conf_rollout") hydra_config = { '_target_': 'maze.core.trajectory_recording.datasets.in_memory_dataset.InMemoryDataset', 'n_workers': 2, 'conversion_env_factory': lambda: make_gym_maze_env("CartPole-v0"), 'input_data': 'trajectory_data', 'deserialize_in_main_thread': False, 'trajectory_processor': { '_target_': 'maze.core.trajectory_recording.datasets.trajectory_processor.DeadEndClippingTrajectoryProcessor', 'clip_k': 2 } } dataset = Factory(InMemoryDataset).instantiate(hydra_config) assert len(dataset) == 11 + 17
def check_env_and_model_instantiation(config_module: str, config: str, overrides: Dict[str, str]) -> None: """Check if env instantiation works.""" with initialize_config_module(config_module): # config is relative to a module cfg = compose( config, overrides=[key + "=" + value for key, value in overrides.items()]) env_factory = EnvFactory(cfg.env, cfg.wrappers if "wrappers" in cfg else {}) env = env_factory() assert env is not None assert isinstance(env, (StructuredEnv, StructuredEnvSpacesMixin)) if 'model' in overrides and overrides['model'] == 'rllib': return if 'model' in cfg: model_composer = Factory(BaseModelComposer).instantiate( cfg.model, action_spaces_dict=env.action_spaces_dict, observation_spaces_dict=env.observation_spaces_dict, agent_counts_dict=env.agent_counts_dict) for pp in model_composer.policy.networks.values(): assert isinstance(pp, nn.Module) if model_composer.critic: for cc in model_composer.critic.networks.values(): assert isinstance(cc, nn.Module)
def __init__(self, in_keys: Union[str, List[str]], out_keys: Union[str, List[str]], in_shapes: Union[Sequence[int], List[Sequence[int]]], hidden_size: int, num_layers: int, bidirectional: bool, non_lin: Union[str, type(nn.Module)]): super().__init__(in_keys=in_keys, out_keys=out_keys, in_shapes=in_shapes, in_num_dims=3, out_num_dims=3) self.input_units = self.in_shapes[0][-1] self.hidden_size = hidden_size self.num_layers = num_layers self.bidirectional = bidirectional self.non_lin = Factory(base_type=nn.Module).type_from_name(non_lin) self.output_units = 2 * self.hidden_size if self.bidirectional else self.hidden_size # compile network self.net = nn.LSTM(input_size=self.input_units, hidden_size=self.hidden_size, num_layers=self.num_layers, bidirectional=self.bidirectional, batch_first=True) self.final_dense = nn.Sequential( nn.Linear(in_features=self.output_units, out_features=self.output_units), self.non_lin())
def __init__(self, in_keys: Union[str, List[str]], out_keys: Union[str, List[str]], in_shapes: Union[Sequence[int], List[Sequence[int]]], hidden_channels: List[int], hidden_kernels: List[Union[int, Tuple[int, ...]]], non_lin: Union[str, type(nn.Module)], convolution_dimension: int, hidden_strides: Optional[List[Union[int, Tuple[int, ...]]]], hidden_dilations: Optional[List[Union[int, Tuple[int, ...]]]], hidden_padding: Optional[List[Union[int, Tuple[int, ...]]]], padding_mode: Optional[str]): assert convolution_dimension in [1, 2, 3] if convolution_dimension == 1: self.convolution_nn = nn.Conv1d in_out_num_dims = 3 for hk in hidden_kernels: assert isinstance(hk, int) elif convolution_dimension == 2: self.convolution_nn = nn.Conv2d in_out_num_dims = 4 for hk in hidden_kernels: assert isinstance(hk, int) or len(hk) == 2 else: self.convolution_nn = nn.Conv3d in_out_num_dims = 5 for hk in hidden_kernels: assert isinstance(hk, int) or len(hk) == 3 super().__init__(in_keys=in_keys, out_keys=out_keys, in_shapes=in_shapes, in_num_dims=in_out_num_dims, out_num_dims=in_out_num_dims) self.convolutional_dim = convolution_dimension self.input_channels = self.in_shapes[0][-(in_out_num_dims - 1)] self.hidden_channels = hidden_channels self.hidden_kernels = hidden_kernels self.non_lin = Factory(base_type=nn.Module).type_from_name(non_lin) self.output_channels = self.hidden_channels[-1] # Optional arguments num_layers = len(self.hidden_channels) self.hidden_strides = hidden_strides if hidden_strides is not None else [1 for _ in range(num_layers)] self.hidden_dilations = hidden_dilations if hidden_dilations is not None else [1 for _ in range(num_layers)] self.hidden_padding = hidden_padding if hidden_padding is not None else [0 for _ in range(num_layers)] self.padding_mode = padding_mode if padding_mode is not None else 'zeros' # checks assert self.padding_mode in ['zeros', 'reflect', 'replicate', 'circular'] assert len(self.hidden_channels) == len(self.hidden_kernels) assert len(self.hidden_channels) == len(self.hidden_strides) assert len(self.hidden_channels) == len(self.hidden_dilations) # compile layer dictionary layer_dict = self.build_layer_dict() # compile network self.net = nn.Sequential(layer_dict)
def __call__(self, *args, **kwargs) -> MazeEnv: """environment factory :return: Newly created environment instance. """ env = Factory(MazeEnv).instantiate(self.env) env = WrapperFactory.wrap_from_config(env, self.wrappers) return env
def test_cartpole_model_composer(): path_to_model_config = code_snippets.__path__._path[0] + '/custom_complex_net.yaml' model_composer = Factory(base_type=BaseModelComposer).instantiate( yaml.load(open(path_to_model_config, 'r')), action_spaces_dict=_mock_action_spaces_dict(), observation_spaces_dict=_mock_observation_spaces_dict(), agent_counts_dict=_mock_agent_counts_dict())
def test_init_cartpole_maze_model(): """test the init methods """ hydra_overrides = { 'rllib/runner': 'dev', 'configuration': 'test', 'env': 'gym_env', 'model': 'vector_obs', 'wrappers': 'vector_obs', 'critic': 'template_state' } cfg = load_hydra_config('maze.conf', 'conf_rllib', hydra_overrides) runner = Factory(base_type=MazeRLlibRunner).instantiate(cfg.runner) runner.setup(cfg) ray_config, rllib_config, tune_config = runner.ray_config, runner.rllib_config, runner.tune_config assert isinstance(runner.env_factory(), CartPoleEnv) assert issubclass(_global_registry.get(RLLIB_ACTION_DIST, 'maze_dist'), MazeRLlibActionDistribution) assert issubclass(_global_registry.get(RLLIB_MODEL, 'maze_model'), MazeRLlibPolicyModel) assert isinstance(ray_config, dict) assert isinstance(rllib_config, dict) assert isinstance(tune_config, dict) assert rllib_config['env'] == 'maze_env' assert rllib_config['framework'] == 'torch' assert rllib_config['num_workers'] == 1 model_config = rllib_config['model'] assert model_config['custom_action_dist'] == 'maze_dist' assert model_config['custom_model'] == 'maze_model' assert model_config['vf_share_layers'] is False assert model_config['custom_model_config'][ 'maze_model_composer_config'] == cfg.model assert model_config['custom_model_config'][ 'spaces_config_dump_file'] == cfg.runner.spaces_config_dump_file if 'ObservationNormalizationWrapper' in cfg.wrappers: assert os.path.exists( cfg.wrappers.ObservationNormalizationWrapper.statistics_dump) os.remove(cfg.wrappers.ObservationNormalizationWrapper.statistics_dump)
def __init__(self, action_space: spaces.Dict, distribution_mapper_config: CollectionOfConfigType): self.action_space = action_space # mapping of action heads to distributions and configs self._action_head_to_distribution: Dict[str, Tuple[ type(TorchProbabilityDistribution), Dict[str, Any]]] = dict() # first: apply default config to action heads for action_head, sub_action_space in action_space.spaces.items(): space_type = type(sub_action_space) dist_type: TorchProbabilityDistribution = self.default_mapping[ space_type] self._action_head_to_distribution[action_head] = (dist_type, {}) # second: parse custom mappings for entry_dict in distribution_mapper_config: assert "distribution" in entry_dict assert ("action_space" in entry_dict and "action_head" not in entry_dict) or \ ("action_space" not in entry_dict and "action_head" in entry_dict) # get the distribution type distribution_type = Factory( TorchProbabilityDistribution).type_from_name( entry_dict["distribution"]) # get additional distribution arguments args = entry_dict["args"] if "args" in entry_dict else {} if "action_head" in entry_dict: self._action_head_to_distribution[ entry_dict["action_head"]] = (distribution_type, args) elif "action_space" in entry_dict: sub_action_space = Factory(spaces.Space).type_from_name( entry_dict["action_space"]) for action_head in self.action_space.spaces: if isinstance(self.action_space[action_head], sub_action_space): self._action_head_to_distribution[action_head] = ( distribution_type, args)
def test_cartpole_model_composer(): env = GymMazeEnv(env='CartPole-v0') path_to_model_config = code_snippets.__path__._path[ 0] + '/custom_plain_cartpole_net.yaml' model_composer = Factory(base_type=BaseModelComposer).instantiate( yaml.load(open(path_to_model_config, 'r')), action_spaces_dict=env.action_spaces_dict, observation_spaces_dict=env.observation_spaces_dict, agent_counts_dict=env.agent_counts_dict)
def __init__(self, observation_spaces_dict: Dict[Union[str, int], spaces.Dict], agent_counts_dict: Dict[StepKeyType, int], networks: CollectionOfConfigType): super().__init__(observation_spaces_dict, agent_counts_dict) # initialize critic model_registry = Factory(base_type=nn.Module) networks = list_to_dict(networks) self._critics = dict() for idx, (key, net_config) in enumerate(networks.items()): step_obs_shapes = self._obs_shapes[key] if idx > 0: step_obs_shapes = { **step_obs_shapes, self.prev_value_key: self.prev_value_shape } self._critics[key] = model_registry.instantiate( networks[key], obs_shapes=step_obs_shapes)
def __init__(self, observation_spaces_dict: Dict[StepKeyType, spaces.Dict], agent_counts_dict: Dict[StepKeyType, int], networks: ConfigType, stack_observations: bool): super().__init__(observation_spaces_dict, agent_counts_dict) assert len(networks) == 1 self.stack_observations = stack_observations network = networks[0] obs_shapes_flat = self._obs_shapes if self.stack_observations: obs_shapes_flat = stacked_shapes(obs_shapes_flat, self._agent_counts_dict) obs_shapes_flat = flat_structured_shapes(obs_shapes_flat) self._obs_shapes = {0: obs_shapes_flat} # initialize critic model_registry = Factory(base_type=nn.Module) self._critics = { 0: model_registry.instantiate(network, obs_shapes=obs_shapes_flat) }
def __init__(self, input_data: Optional[Union[str, Path, List[Union[str, Path]]]], conversion_env_factory: Optional[Callable], n_workers: int, trajectory_processor: Union[TrajectoryProcessor, ConfigType], deserialize_in_main_thread: bool): self._conversion_env_factory = conversion_env_factory self._conversion_env = self._conversion_env_factory( ) if self._conversion_env_factory else None self.n_workers = n_workers self._trajectory_processor = Factory(TrajectoryProcessor).instantiate( trajectory_processor) self._deserialize_in_main_thread = deserialize_in_main_thread self.step_records = [] self.trajectory_references = [] self.reporting_queue = None if input_data is not None: self.load_data(input_data)
def _run_job(cfg: DictConfig) -> None: """Runs a regular maze job. :param cfg: Hydra configuration for the rollout. """ set_matplotlib_backend() # If no env or agent base seed is given generate the seeds randomly and add them to the resolved hydra config if cfg.seeding.env_base_seed is None: cfg.seeding.env_base_seed = MazeSeeding.generate_seed_from_random_state( np.random.RandomState(None)) if cfg.seeding.agent_base_seed is None: cfg.seeding.agent_base_seed = MazeSeeding.generate_seed_from_random_state( np.random.RandomState(None)) # print and log config config_str = yaml.dump(OmegaConf.to_container(cfg, resolve=True), sort_keys=False) with open("hydra_config.yaml", "w") as fp: fp.write("\n" + config_str) BColors.print_colored(config_str, color=BColors.HEADER) print("Output directory: {}\n".format(os.path.abspath("."))) # run job runner = Factory(base_type=Runner).instantiate(cfg.runner) runner.setup(cfg) runner.run()
def __init__(self, in_keys: Union[str, List[str]], out_keys: Union[str, List[str]], in_shapes: Union[Sequence[int], List[Sequence[int]]], hidden_features: List[int], non_lins: Union[str, type(nn.Module), List[str], List[type(nn.Module)]], n_heads: Union[int, List[int]], attention_alpha: Union[List[float], float], avg_last_head_attentions: bool, attention_dropout: Union[float, List[float]]): super().__init__(in_keys=in_keys, out_keys=out_keys, in_shapes=in_shapes, in_num_dims=[3, 3], out_num_dims=3) # Assertions assert len(self.in_keys) == 2, 'There should be two input keys, feature matrix + adjacency matrix' assert self.in_shapes[0][-2] == self.in_shapes[1][-1], 'The node dimension of the feature matrix should be ' \ 'the same as the adjacency matrix\'s rows and ' \ f'columns {self.in_shapes}' assert self.in_shapes[1][-1] == self.in_shapes[1][-2], 'The adjacency matrix has to be a square matrix' self.avg_last_head_attentions = avg_last_head_attentions # Specify dummy dict creation function for adjacency matrix: self.dummy_dict_creators[1] = self._dummy_symmetric_adj_tensor_factory(self.in_shapes[1]) # Init class objects self.input_features = self.in_shapes[0][-1] self.hidden_features = hidden_features # Create list of heads for each layer self.n_heads: List[int] = n_heads if isinstance(n_heads, list) else [n_heads] * len(self.hidden_features) # The output features of this block are equivalent to the specified last hidden features if # :param avg_last_head_attention is set to true, otherwise the last output will be concatenated and as such is # equivalent to the number of last hidden features times the last specified number of heads self.output_features = self.hidden_features[-1] if self.avg_last_head_attentions else \ self.hidden_features[-1] * self.n_heads[-1] # Create list of non-linearity's for each layer non_lins = non_lins if isinstance(non_lins, list) else [non_lins] * len(self.hidden_features) self.non_lins: List[type(nn.Module)] = [Factory(base_type=nn.Module).type_from_name(non_lin) for non_lin in non_lins] # Create list of dropout for each layer self.attention_dropout = attention_dropout if isinstance(attention_dropout, list) \ else [attention_dropout] * len(self.hidden_features) # Create list of alpha for each layer self.attention_alpha = attention_alpha if isinstance(attention_alpha, list) \ else [attention_alpha] * len(self.hidden_features) # compile layer dictionary layer_dict = self.build_layer_dict() # compile network self.net = nn.Sequential(layer_dict)
def __init__(self, observation_spaces_dict: Dict[Union[str, int], spaces.Dict], agent_counts_dict: Dict[StepKeyType, int], networks: CollectionOfConfigType): super().__init__(observation_spaces_dict, agent_counts_dict) # initialize critics networks = list_to_dict(networks) self._critics = { key: Factory(base_type=nn.Module).instantiate( networks[key], obs_shapes=self._obs_shapes[key]) for key in networks.keys() }
def __init__(self, env: MazeEnv, plot_function: Optional[str]): """Avoid calling this constructor directly, use :method:`wrap` instead.""" super().__init__(env) # create event topics self.observation_events = self.core_env.context.event_service.create_event_topic( ObservationVisualizationEvents) # update plot function if plot_function is not None: function = Factory(Callable).type_from_name(plot_function) ObservationVisualizationEvents.observation_to_visualize.tensorboard_render_figure_dict[ None] = function
def __init__(self, in_keys: Union[str, List[str]], out_keys: Union[str, List[str]], in_shapes: Union[Sequence[int], List[Sequence[int]]], hidden_channels: List[int], non_lin: Union[str, type(nn.Module)]): super().__init__(in_keys=in_keys, out_keys=out_keys, in_shapes=in_shapes, in_num_dims=4, out_num_dims=4) self.input_channels = self.in_shapes[0][-3] self.hidden_channels = hidden_channels self.non_lin = Factory(base_type=nn.Module).type_from_name(non_lin) self.output_channels = self.hidden_channels[-1] # compile layer dictionary layer_dict = self.build_layer_dict() # compile network self.net = nn.Sequential(layer_dict)
def _initialize_preprocessors(self) -> None: """Initialize pre-processors for all sub steps and all dictionary observations. """ # get full flat observation space observation_spaces = flat_structured_space( self.observation_spaces_dict).spaces # maintain a list of temporary spaces temporary_spaces = [] # iterate pre-processor config for mapping in self.pre_processor_mapping: obs_key = mapping["observation"] assert obs_key in observation_spaces, f"Observation {obs_key} not contained in observation space." pre_processor_cls = Factory(PreProcessor).type_from_name( mapping["_target_"]) assert isinstance(mapping["config"], Mapping), \ f"Make sure that the config for {pre_processor_cls.__name__} of observation {obs_key} is a dict!" processor = pre_processor_cls( observation_space=observation_spaces[obs_key], **mapping["config"]) self._preprocessors.append( (obs_key, processor, mapping["keep_original"])) # append processed space tag = f"{obs_key}-{processor.tag()}" observation_spaces[tag] = processor.processed_space() # iterate all structured env sub steps and update observation spaces accordingly for sub_step_key, sub_space in self.observation_spaces_dict.items( ): # check if the subspace is contained if obs_key in sub_space.spaces: # add new key to observation space self.observation_spaces_dict[sub_step_key].spaces[ tag] = processor.processed_space() # remove original key from observation space if not mapping["keep_original"]: temporary_spaces.append((sub_step_key, obs_key)) # remove temporary spaces for sub_step_key, obs_key in temporary_spaces: self.observation_spaces_dict[sub_step_key].spaces.pop(obs_key)